Upload 64 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +10 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md +55 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png +3 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +31 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp +365 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin +3 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png +3 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py +133 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md +55 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png +3 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +31 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +365 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt +3 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth +3 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin +3 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png +3 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py +11 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py +133 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py +44 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py +10 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc +0 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py +318 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py +157 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py +80 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py +119 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py +555 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py +663 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py +235 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py +385 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py +32 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py +593 -0
- model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py +38 -0
- model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md +55 -0
- model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png +3 -0
- model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt +31 -0
- model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp +365 -0
- model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin +3 -0
- model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
44 |
+
model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
|
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
### Source model
|
3 |
+
- Input shape: 1x3x512x1024
|
4 |
+
- Number of parameters:18.04M
|
5 |
+
- Model size:69.4MB,
|
6 |
+
- Output shape: 1x19x64x128
|
7 |
+
|
8 |
+
Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: INT8
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: FV01 QCS6490
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
#### python
|
43 |
+
```bash
|
44 |
+
cd python
|
45 |
+
python3 demo_qnn.py
|
46 |
+
```
|
47 |
+
|
48 |
+
#### c++
|
49 |
+
```bash
|
50 |
+
cd ffnet54s/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp
|
51 |
+
mkdir build && cd build
|
52 |
+
cmake ..
|
53 |
+
make
|
54 |
+
./run_test
|
55 |
+
```
|
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required (VERSION 3.5)
|
2 |
+
project("run_test")
|
3 |
+
|
4 |
+
find_package(OpenCV REQUIRED)
|
5 |
+
|
6 |
+
message(STATUS "oPENCV Library status:")
|
7 |
+
message(STATUS ">version:${OpenCV_VERSION}")
|
8 |
+
message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
|
9 |
+
|
10 |
+
set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
|
11 |
+
|
12 |
+
include_directories(
|
13 |
+
/usr/local/include
|
14 |
+
/usr/include/opencv4
|
15 |
+
)
|
16 |
+
|
17 |
+
link_directories(
|
18 |
+
/usr/local/lib/
|
19 |
+
)
|
20 |
+
|
21 |
+
file(GLOB SRC_LISTS
|
22 |
+
${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
|
23 |
+
)
|
24 |
+
|
25 |
+
add_executable(run_test ${SRC_LISTS})
|
26 |
+
|
27 |
+
target_link_libraries(run_test
|
28 |
+
aidlite
|
29 |
+
${OpenCV_LIBS}
|
30 |
+
pthread
|
31 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp
ADDED
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <string>
|
3 |
+
#include <algorithm>
|
4 |
+
#include <cctype>
|
5 |
+
#include <cstring> // 用于 memcpy
|
6 |
+
#include <opencv2/opencv.hpp>
|
7 |
+
#include <aidlux/aidlite/aidlite.hpp>
|
8 |
+
#include <vector>
|
9 |
+
#include <numeric>
|
10 |
+
#include <cmath>
|
11 |
+
#include <array>
|
12 |
+
#include <cstdint>
|
13 |
+
|
14 |
+
using namespace cv;
|
15 |
+
using namespace std;
|
16 |
+
using namespace Aidlux::Aidlite;
|
17 |
+
|
18 |
+
// 定義顏色表 (19個類別)
|
19 |
+
const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
|
20 |
+
{0, 0, 0}, // 0=background
|
21 |
+
{128, 0, 0}, // 1=aeroplane
|
22 |
+
{0, 128, 0}, // 2=bicycle
|
23 |
+
{128, 128, 0}, // 3=bird
|
24 |
+
{0, 0, 128}, // 4=boat
|
25 |
+
{128, 0, 128}, // 5=bottle
|
26 |
+
{0, 128, 128}, // 6=bus
|
27 |
+
{128, 128, 128}, // 7=car
|
28 |
+
{64, 0, 0}, // 8=cat
|
29 |
+
{192, 0, 0}, // 9=chair
|
30 |
+
{64, 128, 0}, // 10=cow
|
31 |
+
{192, 128, 0}, // 11=dining table
|
32 |
+
{64, 0, 128}, // 12=dog
|
33 |
+
{192, 0, 128}, // 13=horse
|
34 |
+
{64, 128, 128}, // 14=motorbike
|
35 |
+
{192, 128, 128}, // 15=person
|
36 |
+
{0, 64, 0}, // 16=potted plant
|
37 |
+
{128, 64, 0}, // 17=sheep
|
38 |
+
{0, 192, 0}, // 18=sofa
|
39 |
+
}};
|
40 |
+
|
41 |
+
// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
|
42 |
+
std::vector<float> bilinear_interpolate(
|
43 |
+
const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
|
44 |
+
|
45 |
+
std::vector<float> output(target_h * target_w * channels, 0.0f);
|
46 |
+
const float scale_h = static_cast<float>(src_h) / target_h;
|
47 |
+
const float scale_w = static_cast<float>(src_w) / target_w;
|
48 |
+
|
49 |
+
for (int y = 0; y < target_h; ++y) {
|
50 |
+
const float y_src = (y + 0.5f) * scale_h - 0.5f;
|
51 |
+
const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
|
52 |
+
const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
|
53 |
+
const float dy = y_src - y0;
|
54 |
+
|
55 |
+
for (int x = 0; x < target_w; ++x) {
|
56 |
+
const float x_src = (x + 0.5f) * scale_w - 0.5f;
|
57 |
+
const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
|
58 |
+
const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
|
59 |
+
const float dx = x_src - x0;
|
60 |
+
|
61 |
+
for (int c = 0; c < channels; ++c) {
|
62 |
+
const int src_idx = c * src_h * src_w;
|
63 |
+
const float val00 = input[src_idx + y0 * src_w + x0];
|
64 |
+
const float val01 = input[src_idx + y0 * src_w + x1];
|
65 |
+
const float val10 = input[src_idx + y1 * src_w + x0];
|
66 |
+
const float val11 = input[src_idx + y1 * src_w + x1];
|
67 |
+
|
68 |
+
const float val = (1 - dy) * (1 - dx) * val00 +
|
69 |
+
(1 - dy) * dx * val01 +
|
70 |
+
dy * (1 - dx) * val10 +
|
71 |
+
dy * dx * val11;
|
72 |
+
|
73 |
+
output[c * target_h * target_w + y * target_w + x] = val;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
return output;
|
78 |
+
}
|
79 |
+
|
80 |
+
// Softmax 計算 (通道維度)
|
81 |
+
void softmax(float* data, int height, int width, int channels) {
|
82 |
+
for (int y = 0; y < height; ++y) {
|
83 |
+
for (int x = 0; x < width; ++x) {
|
84 |
+
float max_val = -INFINITY;
|
85 |
+
for (int c = 0; c < channels; ++c) {
|
86 |
+
const int idx = c * height * width + y * width + x;
|
87 |
+
max_val = std::max(max_val, data[idx]);
|
88 |
+
}
|
89 |
+
|
90 |
+
float sum_exp = 0.0f;
|
91 |
+
for (int c = 0; c < channels; ++c) {
|
92 |
+
const int idx = c * height * width + y * width + x;
|
93 |
+
sum_exp += std::exp(data[idx] - max_val);
|
94 |
+
}
|
95 |
+
|
96 |
+
for (int c = 0; c < channels; ++c) {
|
97 |
+
const int idx = c * height * width + y * width + x;
|
98 |
+
data[idx] = std::exp(data[idx] - max_val) / sum_exp;
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
// 提取最大類別索引
|
105 |
+
std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
|
106 |
+
std::vector<uint8_t> pred(height * width);
|
107 |
+
for (int y = 0; y < height; ++y) {
|
108 |
+
for (int x = 0; x < width; ++x) {
|
109 |
+
float max_prob = -INFINITY;
|
110 |
+
uint8_t max_idx = 0;
|
111 |
+
for (int c = 0; c < channels; ++c) {
|
112 |
+
const int idx = c * height * width + y * width + x;
|
113 |
+
if (data[idx] > max_prob) {
|
114 |
+
max_prob = data[idx];
|
115 |
+
max_idx = c;
|
116 |
+
}
|
117 |
+
}
|
118 |
+
pred[y * width + x] = max_idx;
|
119 |
+
}
|
120 |
+
}
|
121 |
+
return pred;
|
122 |
+
}
|
123 |
+
|
124 |
+
// 解碼為 RGB 圖像
|
125 |
+
std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
|
126 |
+
std::vector<uint8_t> rgb(height * width * 3);
|
127 |
+
for (int y = 0; y < height; ++y) {
|
128 |
+
for (int x = 0; x < width; ++x) {
|
129 |
+
const int idx = y * width + x;
|
130 |
+
const uint8_t label = pred[idx];
|
131 |
+
if (label < 19) {
|
132 |
+
rgb[idx * 3] = label_colors[label][0];
|
133 |
+
rgb[idx * 3 + 1] = label_colors[label][1];
|
134 |
+
rgb[idx * 3 + 2] = label_colors[label][2];
|
135 |
+
} else {
|
136 |
+
rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
|
137 |
+
}
|
138 |
+
}
|
139 |
+
}
|
140 |
+
return rgb;
|
141 |
+
}
|
142 |
+
|
143 |
+
struct Args {
|
144 |
+
std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
|
145 |
+
std::string imgs = "../2.png";
|
146 |
+
int invoke_nums = 10;
|
147 |
+
std::string model_type = "QNN";
|
148 |
+
};
|
149 |
+
|
150 |
+
Args parse_args(int argc, char* argv[]) {
|
151 |
+
Args args;
|
152 |
+
for (int i = 1; i < argc; ++i) {
|
153 |
+
std::string arg = argv[i];
|
154 |
+
if (arg == "--target_model" && i + 1 < argc) {
|
155 |
+
args.target_model = argv[++i];
|
156 |
+
} else if (arg == "--imgs" && i + 1 < argc) {
|
157 |
+
args.imgs = argv[++i];
|
158 |
+
} else if (arg == "--invoke_nums" && i + 1 < argc) {
|
159 |
+
args.invoke_nums = std::stoi(argv[++i]);
|
160 |
+
} else if (arg == "--model_type" && i + 1 < argc) {
|
161 |
+
args.model_type = argv[++i];
|
162 |
+
}
|
163 |
+
}
|
164 |
+
return args;
|
165 |
+
}
|
166 |
+
|
167 |
+
std::string to_lower(const std::string& str) {
|
168 |
+
std::string lower_str = str;
|
169 |
+
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
|
170 |
+
return std::tolower(c);
|
171 |
+
});
|
172 |
+
return lower_str;
|
173 |
+
}
|
174 |
+
|
175 |
+
int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
|
176 |
+
|
177 |
+
int current_coordinate[4] = {0, 0, 0, 0};
|
178 |
+
for(int a = 0; a < src_dims[0]; ++a){
|
179 |
+
current_coordinate[0] = a;
|
180 |
+
for(int b = 0; b < src_dims[1]; ++b){
|
181 |
+
current_coordinate[1] = b;
|
182 |
+
for(int c = 0; c < src_dims[2]; ++c){
|
183 |
+
current_coordinate[2] = c;
|
184 |
+
for(int d = 0; d < src_dims[3]; ++d){
|
185 |
+
current_coordinate[3] = d;
|
186 |
+
|
187 |
+
int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
|
188 |
+
current_coordinate[1]*src_dims[2]*src_dims[3] +
|
189 |
+
current_coordinate[2]*src_dims[3] +
|
190 |
+
current_coordinate[3];
|
191 |
+
|
192 |
+
int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
193 |
+
current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
194 |
+
current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
195 |
+
current_coordinate[tsp_dims[3]];
|
196 |
+
|
197 |
+
dest[new_index] = src[old_index];
|
198 |
+
}
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
return EXIT_SUCCESS;
|
204 |
+
}
|
205 |
+
|
206 |
+
cv::Mat post_process(cv::Mat &frame, float* outdata)
|
207 |
+
{
|
208 |
+
cv::Mat input_image = frame.clone();
|
209 |
+
// Initialize vectors to hold respective outputs while unwrapping detections.
|
210 |
+
std::vector<int> class_ids;
|
211 |
+
std::vector<float> confidences;
|
212 |
+
std::vector<cv::Rect> boxes;
|
213 |
+
std::vector<cv::Mat> masks;
|
214 |
+
std::vector<float> class_scores;
|
215 |
+
cv::RNG rng;
|
216 |
+
cv::Mat masked_img;
|
217 |
+
|
218 |
+
unsigned int src_dims[4] = {1, 64,128,19};
|
219 |
+
unsigned int tsp_dims[4] = {0,3,1,2};
|
220 |
+
unsigned int stride_data_num = 1*64*128*19;
|
221 |
+
float* format_data = new float[stride_data_num];
|
222 |
+
transpose(outdata, src_dims, tsp_dims, format_data);
|
223 |
+
cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
|
224 |
+
std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
|
225 |
+
|
226 |
+
const int channels = 19;
|
227 |
+
int target_h = 64, target_w = 128;
|
228 |
+
int src_h = 64, src_w = 128;
|
229 |
+
// Step 1: 雙線性插值
|
230 |
+
auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
|
231 |
+
|
232 |
+
// Step 2: Softmax
|
233 |
+
softmax(interpolated.data(), target_h, target_w, channels);
|
234 |
+
|
235 |
+
// Step 3: 獲取預測類別
|
236 |
+
auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
|
237 |
+
|
238 |
+
printf("Processing finished.\n");
|
239 |
+
// Step 4: 解碼為 RGB
|
240 |
+
std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
|
241 |
+
cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
|
242 |
+
|
243 |
+
// Step 2: 转换颜色通道 (RGB → BGR)
|
244 |
+
if (channels == 3) {
|
245 |
+
cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
|
246 |
+
}
|
247 |
+
cv::Mat resized_cubic;
|
248 |
+
cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
|
249 |
+
return resized_cubic;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
+
int invoke(const Args& args) {
|
254 |
+
std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
|
255 |
+
<< "Image Path: " << args.imgs << "\n"
|
256 |
+
<< "Inference Nums: " << args.invoke_nums << "\n"
|
257 |
+
<< "Model Type: " << args.model_type << "\n";
|
258 |
+
Model* model = Model::create_instance(args.target_model);
|
259 |
+
if(model == nullptr){
|
260 |
+
printf("Create model failed !\n");
|
261 |
+
return EXIT_FAILURE;
|
262 |
+
}
|
263 |
+
Config* config = Config::create_instance();
|
264 |
+
if(config == nullptr){
|
265 |
+
printf("Create config failed !\n");
|
266 |
+
return EXIT_FAILURE;
|
267 |
+
}
|
268 |
+
config->implement_type = ImplementType::TYPE_LOCAL;
|
269 |
+
std::string model_type_lower = to_lower(args.model_type);
|
270 |
+
if (model_type_lower == "qnn"){
|
271 |
+
config->framework_type = FrameworkType::TYPE_QNN;
|
272 |
+
} else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
|
273 |
+
config->framework_type = FrameworkType::TYPE_SNPE2;
|
274 |
+
}
|
275 |
+
config->accelerate_type = AccelerateType::TYPE_DSP;
|
276 |
+
config->is_quantify_model = 1;
|
277 |
+
|
278 |
+
unsigned int model_h = 512;
|
279 |
+
unsigned int model_w = 1024;
|
280 |
+
std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
|
281 |
+
std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
|
282 |
+
model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
|
283 |
+
std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
|
284 |
+
if(fast_interpreter == nullptr){
|
285 |
+
printf("build_interpretper_from_model_and_config failed !\n");
|
286 |
+
return EXIT_FAILURE;
|
287 |
+
}
|
288 |
+
int result = fast_interpreter->init();
|
289 |
+
if(result != EXIT_SUCCESS){
|
290 |
+
printf("interpreter->init() failed !\n");
|
291 |
+
return EXIT_FAILURE;
|
292 |
+
}
|
293 |
+
// load model
|
294 |
+
fast_interpreter->load_model();
|
295 |
+
if(result != EXIT_SUCCESS){
|
296 |
+
printf("interpreter->load_model() failed !\n");
|
297 |
+
return EXIT_FAILURE;
|
298 |
+
}
|
299 |
+
printf("detect model load success!\n");
|
300 |
+
|
301 |
+
cv::Mat frame = cv::imread(args.imgs);
|
302 |
+
if (frame.empty()) {
|
303 |
+
printf("detect image load failed!\n");
|
304 |
+
return 1;
|
305 |
+
}
|
306 |
+
printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
|
307 |
+
cv::Mat input_data;
|
308 |
+
cv::Mat frame_clone = frame.clone();
|
309 |
+
cv::Scalar stds_scale(58.395, 57.12, 57.375);
|
310 |
+
cv::Scalar means_scale(123.675, 116.28, 103.53);
|
311 |
+
cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
|
312 |
+
frame_clone.convertTo(input_data, CV_32F);
|
313 |
+
cv::subtract(input_data, means_scale, input_data);
|
314 |
+
cv::divide(input_data, stds_scale, input_data);
|
315 |
+
|
316 |
+
float *outdata0 = nullptr;
|
317 |
+
std::vector<float> invoke_time;
|
318 |
+
for (int i = 0; i < args.invoke_nums; ++i) {
|
319 |
+
result = fast_interpreter->set_input_tensor(0, input_data.data);
|
320 |
+
if(result != EXIT_SUCCESS){
|
321 |
+
printf("interpreter->set_input_tensor() failed !\n");
|
322 |
+
return EXIT_FAILURE;
|
323 |
+
}
|
324 |
+
auto t1 = std::chrono::high_resolution_clock::now();
|
325 |
+
result = fast_interpreter->invoke();
|
326 |
+
auto t2 = std::chrono::high_resolution_clock::now();
|
327 |
+
std::chrono::duration<double> cost_time = t2 - t1;
|
328 |
+
invoke_time.push_back(cost_time.count() * 1000);
|
329 |
+
if(result != EXIT_SUCCESS){
|
330 |
+
printf("interpreter->invoke() failed !\n");
|
331 |
+
return EXIT_FAILURE;
|
332 |
+
}
|
333 |
+
uint32_t out_data_0 = 0;
|
334 |
+
result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
|
335 |
+
if(result != EXIT_SUCCESS){
|
336 |
+
printf("interpreter->get_output_tensor() 1 failed !\n");
|
337 |
+
return EXIT_FAILURE;
|
338 |
+
}
|
339 |
+
|
340 |
+
}
|
341 |
+
|
342 |
+
float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
|
343 |
+
float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
|
344 |
+
float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
|
345 |
+
float var_invoketime = 0.0f;
|
346 |
+
for (auto time : invoke_time) {
|
347 |
+
var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
|
348 |
+
}
|
349 |
+
var_invoketime /= args.invoke_nums;
|
350 |
+
printf("=======================================\n");
|
351 |
+
printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
|
352 |
+
args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
|
353 |
+
printf("=======================================\n");
|
354 |
+
|
355 |
+
cv::Mat img = post_process(frame, outdata0);
|
356 |
+
cv::imwrite("./results.png", img);
|
357 |
+
fast_interpreter->destory();
|
358 |
+
return 0;
|
359 |
+
}
|
360 |
+
|
361 |
+
|
362 |
+
int main(int argc, char* argv[]) {
|
363 |
+
Args args = parse_args(argc, argv);
|
364 |
+
return invoke(args);
|
365 |
+
}
|
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ce910deb26ab4b1c9fb1c77e37b12b913473b18ac59c9ca0b45d65f212292d2
|
3 |
+
size 18336944
|
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
import aidlite
|
7 |
+
import os
|
8 |
+
|
9 |
+
|
10 |
+
def decode_segmap(image, nc=19):
|
11 |
+
label_colors = np.array([(0, 0, 0), # 0=background
|
12 |
+
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
|
13 |
+
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
|
14 |
+
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
|
15 |
+
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
|
16 |
+
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
|
17 |
+
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
|
18 |
+
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
|
19 |
+
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
|
20 |
+
r = np.zeros_like(image).astype(np.uint8)
|
21 |
+
g = np.zeros_like(image).astype(np.uint8)
|
22 |
+
b = np.zeros_like(image).astype(np.uint8)
|
23 |
+
for l in range(0, nc):
|
24 |
+
idx = image == l
|
25 |
+
r[idx] = label_colors[l, 0]
|
26 |
+
g[idx] = label_colors[l, 1]
|
27 |
+
b[idx] = label_colors[l, 2]
|
28 |
+
rgb = np.stack([r, g, b], axis=2)
|
29 |
+
return rgb
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
class ffnet54sQnn:
|
34 |
+
def __init__(self):
|
35 |
+
super().__init__()
|
36 |
+
self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
|
37 |
+
if self.model is None:
|
38 |
+
print("Create model failed !")
|
39 |
+
return
|
40 |
+
|
41 |
+
self.config = aidlite.Config.create_instance()
|
42 |
+
if self.config is None:
|
43 |
+
print("build_interpretper_from_model_and_config failed !")
|
44 |
+
return
|
45 |
+
|
46 |
+
self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
47 |
+
self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
48 |
+
self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
49 |
+
# self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
|
50 |
+
self.config.is_quantify_model = 1
|
51 |
+
|
52 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
|
53 |
+
if self.interpreter is None:
|
54 |
+
print("build_interpretper_from_model_and_config failed !")
|
55 |
+
return
|
56 |
+
input_shapes = [[1,512,1024,3]]
|
57 |
+
output_shapes = [[1,64,128,19]]
|
58 |
+
self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
|
59 |
+
output_shapes, aidlite.DataType.TYPE_FLOAT32)
|
60 |
+
|
61 |
+
if self.interpreter is None:
|
62 |
+
print("build_interpretper_from_model_and_config failed !")
|
63 |
+
result = self.interpreter.init()
|
64 |
+
if result != 0:
|
65 |
+
print(f"interpreter init failed !")
|
66 |
+
result = self.interpreter.load_model()
|
67 |
+
if result != 0:
|
68 |
+
print("interpreter load model failed !")
|
69 |
+
|
70 |
+
print(" model load success!")
|
71 |
+
|
72 |
+
def __call__(self, input):
|
73 |
+
self.interpreter.set_input_tensor(0,input)
|
74 |
+
invoke_time=[]
|
75 |
+
invoke_nums =10
|
76 |
+
for i in range(invoke_nums):
|
77 |
+
result = self.interpreter.set_input_tensor(0, input.data)
|
78 |
+
if result != 0:
|
79 |
+
print("interpreter set_input_tensor() failed")
|
80 |
+
t1=time.time()
|
81 |
+
result = self.interpreter.invoke()
|
82 |
+
cost_time = (time.time()-t1)*1000
|
83 |
+
invoke_time.append(cost_time)
|
84 |
+
|
85 |
+
max_invoke_time = max(invoke_time)
|
86 |
+
min_invoke_time = min(invoke_time)
|
87 |
+
mean_invoke_time = sum(invoke_time)/invoke_nums
|
88 |
+
var_invoketime=np.var(invoke_time)
|
89 |
+
print("====================================")
|
90 |
+
print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
91 |
+
print("====================================")
|
92 |
+
features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
|
93 |
+
return features_0
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
ffnet_segm = ffnet54sQnn()
|
100 |
+
|
101 |
+
frame_ct=0
|
102 |
+
image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
|
103 |
+
|
104 |
+
image = cv2.imread(image_path)
|
105 |
+
image=cv2.resize(image,(1024,512))
|
106 |
+
frame = np.ascontiguousarray(image[:,:,::-1])
|
107 |
+
|
108 |
+
mean_data=[123.675, 116.28, 103.53]
|
109 |
+
std_data=[58.395, 57.12, 57.375]
|
110 |
+
img_input = (frame-mean_data)/std_data # HWC
|
111 |
+
input = img_input.astype(np.float32)
|
112 |
+
input = input[np.newaxis, ...]
|
113 |
+
input_size = input.shape[1], input.shape[2] #H w
|
114 |
+
t0 = time.time()
|
115 |
+
out = ffnet_segm(input)
|
116 |
+
use_time = round((time.time() - t0) * 1000, 2)
|
117 |
+
print(f"pose detction inference_time:{use_time} ms")
|
118 |
+
out = np.transpose(out, (0, 3, 1,2))
|
119 |
+
out = torch.from_numpy(out)
|
120 |
+
|
121 |
+
output = torch.nn.functional.interpolate(
|
122 |
+
out, size=input_size, mode="bilinear", align_corners=False
|
123 |
+
)
|
124 |
+
output_data = torch.nn.functional.softmax(output, dim=1).data
|
125 |
+
max_probs, predictions = output_data.max(1)
|
126 |
+
|
127 |
+
|
128 |
+
prediction = predictions.numpy().astype(np.uint8)
|
129 |
+
test = decode_segmap( prediction[0])
|
130 |
+
|
131 |
+
cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
|
132 |
+
ffnet_segm.interpreter.destory()
|
133 |
+
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
### Source model
|
3 |
+
- Input shape: 1x3x512x1024
|
4 |
+
- Number of parameters:18.04M
|
5 |
+
- Model size:69.4MB,
|
6 |
+
- Output shape: 1x19x64x128
|
7 |
+
|
8 |
+
Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: W8A16
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: FV01 QCS6490
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
#### python
|
43 |
+
```bash
|
44 |
+
cd python
|
45 |
+
python3 demo_qnn.py
|
46 |
+
```
|
47 |
+
|
48 |
+
#### c++
|
49 |
+
```bash
|
50 |
+
cd ffnet54s/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp
|
51 |
+
mkdir build && cd build
|
52 |
+
cmake ..
|
53 |
+
make
|
54 |
+
./run_test
|
55 |
+
```
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required (VERSION 3.5)
|
2 |
+
project("run_test")
|
3 |
+
|
4 |
+
find_package(OpenCV REQUIRED)
|
5 |
+
|
6 |
+
message(STATUS "oPENCV Library status:")
|
7 |
+
message(STATUS ">version:${OpenCV_VERSION}")
|
8 |
+
message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
|
9 |
+
|
10 |
+
set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
|
11 |
+
|
12 |
+
include_directories(
|
13 |
+
/usr/local/include
|
14 |
+
/usr/include/opencv4
|
15 |
+
)
|
16 |
+
|
17 |
+
link_directories(
|
18 |
+
/usr/local/lib/
|
19 |
+
)
|
20 |
+
|
21 |
+
file(GLOB SRC_LISTS
|
22 |
+
${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
|
23 |
+
)
|
24 |
+
|
25 |
+
add_executable(run_test ${SRC_LISTS})
|
26 |
+
|
27 |
+
target_link_libraries(run_test
|
28 |
+
aidlite
|
29 |
+
${OpenCV_LIBS}
|
30 |
+
pthread
|
31 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp
ADDED
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <string>
|
3 |
+
#include <algorithm>
|
4 |
+
#include <cctype>
|
5 |
+
#include <cstring> // 用于 memcpy
|
6 |
+
#include <opencv2/opencv.hpp>
|
7 |
+
#include <aidlux/aidlite/aidlite.hpp>
|
8 |
+
#include <vector>
|
9 |
+
#include <numeric>
|
10 |
+
#include <cmath>
|
11 |
+
#include <array>
|
12 |
+
#include <cstdint>
|
13 |
+
|
14 |
+
using namespace cv;
|
15 |
+
using namespace std;
|
16 |
+
using namespace Aidlux::Aidlite;
|
17 |
+
|
18 |
+
// 定義顏色表 (19個類別)
|
19 |
+
const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
|
20 |
+
{0, 0, 0}, // 0=background
|
21 |
+
{128, 0, 0}, // 1=aeroplane
|
22 |
+
{0, 128, 0}, // 2=bicycle
|
23 |
+
{128, 128, 0}, // 3=bird
|
24 |
+
{0, 0, 128}, // 4=boat
|
25 |
+
{128, 0, 128}, // 5=bottle
|
26 |
+
{0, 128, 128}, // 6=bus
|
27 |
+
{128, 128, 128}, // 7=car
|
28 |
+
{64, 0, 0}, // 8=cat
|
29 |
+
{192, 0, 0}, // 9=chair
|
30 |
+
{64, 128, 0}, // 10=cow
|
31 |
+
{192, 128, 0}, // 11=dining table
|
32 |
+
{64, 0, 128}, // 12=dog
|
33 |
+
{192, 0, 128}, // 13=horse
|
34 |
+
{64, 128, 128}, // 14=motorbike
|
35 |
+
{192, 128, 128}, // 15=person
|
36 |
+
{0, 64, 0}, // 16=potted plant
|
37 |
+
{128, 64, 0}, // 17=sheep
|
38 |
+
{0, 192, 0}, // 18=sofa
|
39 |
+
}};
|
40 |
+
|
41 |
+
// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
|
42 |
+
std::vector<float> bilinear_interpolate(
|
43 |
+
const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
|
44 |
+
|
45 |
+
std::vector<float> output(target_h * target_w * channels, 0.0f);
|
46 |
+
const float scale_h = static_cast<float>(src_h) / target_h;
|
47 |
+
const float scale_w = static_cast<float>(src_w) / target_w;
|
48 |
+
|
49 |
+
for (int y = 0; y < target_h; ++y) {
|
50 |
+
const float y_src = (y + 0.5f) * scale_h - 0.5f;
|
51 |
+
const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
|
52 |
+
const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
|
53 |
+
const float dy = y_src - y0;
|
54 |
+
|
55 |
+
for (int x = 0; x < target_w; ++x) {
|
56 |
+
const float x_src = (x + 0.5f) * scale_w - 0.5f;
|
57 |
+
const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
|
58 |
+
const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
|
59 |
+
const float dx = x_src - x0;
|
60 |
+
|
61 |
+
for (int c = 0; c < channels; ++c) {
|
62 |
+
const int src_idx = c * src_h * src_w;
|
63 |
+
const float val00 = input[src_idx + y0 * src_w + x0];
|
64 |
+
const float val01 = input[src_idx + y0 * src_w + x1];
|
65 |
+
const float val10 = input[src_idx + y1 * src_w + x0];
|
66 |
+
const float val11 = input[src_idx + y1 * src_w + x1];
|
67 |
+
|
68 |
+
const float val = (1 - dy) * (1 - dx) * val00 +
|
69 |
+
(1 - dy) * dx * val01 +
|
70 |
+
dy * (1 - dx) * val10 +
|
71 |
+
dy * dx * val11;
|
72 |
+
|
73 |
+
output[c * target_h * target_w + y * target_w + x] = val;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
return output;
|
78 |
+
}
|
79 |
+
|
80 |
+
// Softmax 計算 (通道維度)
|
81 |
+
void softmax(float* data, int height, int width, int channels) {
|
82 |
+
for (int y = 0; y < height; ++y) {
|
83 |
+
for (int x = 0; x < width; ++x) {
|
84 |
+
float max_val = -INFINITY;
|
85 |
+
for (int c = 0; c < channels; ++c) {
|
86 |
+
const int idx = c * height * width + y * width + x;
|
87 |
+
max_val = std::max(max_val, data[idx]);
|
88 |
+
}
|
89 |
+
|
90 |
+
float sum_exp = 0.0f;
|
91 |
+
for (int c = 0; c < channels; ++c) {
|
92 |
+
const int idx = c * height * width + y * width + x;
|
93 |
+
sum_exp += std::exp(data[idx] - max_val);
|
94 |
+
}
|
95 |
+
|
96 |
+
for (int c = 0; c < channels; ++c) {
|
97 |
+
const int idx = c * height * width + y * width + x;
|
98 |
+
data[idx] = std::exp(data[idx] - max_val) / sum_exp;
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
// 提取最大類別索引
|
105 |
+
std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
|
106 |
+
std::vector<uint8_t> pred(height * width);
|
107 |
+
for (int y = 0; y < height; ++y) {
|
108 |
+
for (int x = 0; x < width; ++x) {
|
109 |
+
float max_prob = -INFINITY;
|
110 |
+
uint8_t max_idx = 0;
|
111 |
+
for (int c = 0; c < channels; ++c) {
|
112 |
+
const int idx = c * height * width + y * width + x;
|
113 |
+
if (data[idx] > max_prob) {
|
114 |
+
max_prob = data[idx];
|
115 |
+
max_idx = c;
|
116 |
+
}
|
117 |
+
}
|
118 |
+
pred[y * width + x] = max_idx;
|
119 |
+
}
|
120 |
+
}
|
121 |
+
return pred;
|
122 |
+
}
|
123 |
+
|
124 |
+
// 解碼為 RGB 圖像
|
125 |
+
std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
|
126 |
+
std::vector<uint8_t> rgb(height * width * 3);
|
127 |
+
for (int y = 0; y < height; ++y) {
|
128 |
+
for (int x = 0; x < width; ++x) {
|
129 |
+
const int idx = y * width + x;
|
130 |
+
const uint8_t label = pred[idx];
|
131 |
+
if (label < 19) {
|
132 |
+
rgb[idx * 3] = label_colors[label][0];
|
133 |
+
rgb[idx * 3 + 1] = label_colors[label][1];
|
134 |
+
rgb[idx * 3 + 2] = label_colors[label][2];
|
135 |
+
} else {
|
136 |
+
rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
|
137 |
+
}
|
138 |
+
}
|
139 |
+
}
|
140 |
+
return rgb;
|
141 |
+
}
|
142 |
+
|
143 |
+
struct Args {
|
144 |
+
std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
|
145 |
+
std::string imgs = "../2.png";
|
146 |
+
int invoke_nums = 10;
|
147 |
+
std::string model_type = "QNN";
|
148 |
+
};
|
149 |
+
|
150 |
+
Args parse_args(int argc, char* argv[]) {
|
151 |
+
Args args;
|
152 |
+
for (int i = 1; i < argc; ++i) {
|
153 |
+
std::string arg = argv[i];
|
154 |
+
if (arg == "--target_model" && i + 1 < argc) {
|
155 |
+
args.target_model = argv[++i];
|
156 |
+
} else if (arg == "--imgs" && i + 1 < argc) {
|
157 |
+
args.imgs = argv[++i];
|
158 |
+
} else if (arg == "--invoke_nums" && i + 1 < argc) {
|
159 |
+
args.invoke_nums = std::stoi(argv[++i]);
|
160 |
+
} else if (arg == "--model_type" && i + 1 < argc) {
|
161 |
+
args.model_type = argv[++i];
|
162 |
+
}
|
163 |
+
}
|
164 |
+
return args;
|
165 |
+
}
|
166 |
+
|
167 |
+
std::string to_lower(const std::string& str) {
|
168 |
+
std::string lower_str = str;
|
169 |
+
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
|
170 |
+
return std::tolower(c);
|
171 |
+
});
|
172 |
+
return lower_str;
|
173 |
+
}
|
174 |
+
|
175 |
+
int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
|
176 |
+
|
177 |
+
int current_coordinate[4] = {0, 0, 0, 0};
|
178 |
+
for(int a = 0; a < src_dims[0]; ++a){
|
179 |
+
current_coordinate[0] = a;
|
180 |
+
for(int b = 0; b < src_dims[1]; ++b){
|
181 |
+
current_coordinate[1] = b;
|
182 |
+
for(int c = 0; c < src_dims[2]; ++c){
|
183 |
+
current_coordinate[2] = c;
|
184 |
+
for(int d = 0; d < src_dims[3]; ++d){
|
185 |
+
current_coordinate[3] = d;
|
186 |
+
|
187 |
+
int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
|
188 |
+
current_coordinate[1]*src_dims[2]*src_dims[3] +
|
189 |
+
current_coordinate[2]*src_dims[3] +
|
190 |
+
current_coordinate[3];
|
191 |
+
|
192 |
+
int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
193 |
+
current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
194 |
+
current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
195 |
+
current_coordinate[tsp_dims[3]];
|
196 |
+
|
197 |
+
dest[new_index] = src[old_index];
|
198 |
+
}
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
return EXIT_SUCCESS;
|
204 |
+
}
|
205 |
+
|
206 |
+
cv::Mat post_process(cv::Mat &frame, float* outdata)
|
207 |
+
{
|
208 |
+
cv::Mat input_image = frame.clone();
|
209 |
+
// Initialize vectors to hold respective outputs while unwrapping detections.
|
210 |
+
std::vector<int> class_ids;
|
211 |
+
std::vector<float> confidences;
|
212 |
+
std::vector<cv::Rect> boxes;
|
213 |
+
std::vector<cv::Mat> masks;
|
214 |
+
std::vector<float> class_scores;
|
215 |
+
cv::RNG rng;
|
216 |
+
cv::Mat masked_img;
|
217 |
+
|
218 |
+
unsigned int src_dims[4] = {1, 64,128,19};
|
219 |
+
unsigned int tsp_dims[4] = {0,3,1,2};
|
220 |
+
unsigned int stride_data_num = 1*64*128*19;
|
221 |
+
float* format_data = new float[stride_data_num];
|
222 |
+
transpose(outdata, src_dims, tsp_dims, format_data);
|
223 |
+
cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
|
224 |
+
std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
|
225 |
+
|
226 |
+
const int channels = 19;
|
227 |
+
int target_h = 64, target_w = 128;
|
228 |
+
int src_h = 64, src_w = 128;
|
229 |
+
// Step 1: 雙線性插值
|
230 |
+
auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
|
231 |
+
|
232 |
+
// Step 2: Softmax
|
233 |
+
softmax(interpolated.data(), target_h, target_w, channels);
|
234 |
+
|
235 |
+
// Step 3: 獲取預測類別
|
236 |
+
auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
|
237 |
+
|
238 |
+
printf("Processing finished.\n");
|
239 |
+
// Step 4: 解碼為 RGB
|
240 |
+
std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
|
241 |
+
cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
|
242 |
+
|
243 |
+
// Step 2: 转换颜色通道 (RGB → BGR)
|
244 |
+
if (channels == 3) {
|
245 |
+
cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
|
246 |
+
}
|
247 |
+
cv::Mat resized_cubic;
|
248 |
+
cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
|
249 |
+
return resized_cubic;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
+
int invoke(const Args& args) {
|
254 |
+
std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
|
255 |
+
<< "Image Path: " << args.imgs << "\n"
|
256 |
+
<< "Inference Nums: " << args.invoke_nums << "\n"
|
257 |
+
<< "Model Type: " << args.model_type << "\n";
|
258 |
+
Model* model = Model::create_instance(args.target_model);
|
259 |
+
if(model == nullptr){
|
260 |
+
printf("Create model failed !\n");
|
261 |
+
return EXIT_FAILURE;
|
262 |
+
}
|
263 |
+
Config* config = Config::create_instance();
|
264 |
+
if(config == nullptr){
|
265 |
+
printf("Create config failed !\n");
|
266 |
+
return EXIT_FAILURE;
|
267 |
+
}
|
268 |
+
config->implement_type = ImplementType::TYPE_LOCAL;
|
269 |
+
std::string model_type_lower = to_lower(args.model_type);
|
270 |
+
if (model_type_lower == "qnn"){
|
271 |
+
config->framework_type = FrameworkType::TYPE_QNN;
|
272 |
+
} else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
|
273 |
+
config->framework_type = FrameworkType::TYPE_SNPE2;
|
274 |
+
}
|
275 |
+
config->accelerate_type = AccelerateType::TYPE_DSP;
|
276 |
+
config->is_quantify_model = 1;
|
277 |
+
|
278 |
+
unsigned int model_h = 512;
|
279 |
+
unsigned int model_w = 1024;
|
280 |
+
std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
|
281 |
+
std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
|
282 |
+
model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
|
283 |
+
std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
|
284 |
+
if(fast_interpreter == nullptr){
|
285 |
+
printf("build_interpretper_from_model_and_config failed !\n");
|
286 |
+
return EXIT_FAILURE;
|
287 |
+
}
|
288 |
+
int result = fast_interpreter->init();
|
289 |
+
if(result != EXIT_SUCCESS){
|
290 |
+
printf("interpreter->init() failed !\n");
|
291 |
+
return EXIT_FAILURE;
|
292 |
+
}
|
293 |
+
// load model
|
294 |
+
fast_interpreter->load_model();
|
295 |
+
if(result != EXIT_SUCCESS){
|
296 |
+
printf("interpreter->load_model() failed !\n");
|
297 |
+
return EXIT_FAILURE;
|
298 |
+
}
|
299 |
+
printf("detect model load success!\n");
|
300 |
+
|
301 |
+
cv::Mat frame = cv::imread(args.imgs);
|
302 |
+
if (frame.empty()) {
|
303 |
+
printf("detect image load failed!\n");
|
304 |
+
return 1;
|
305 |
+
}
|
306 |
+
printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
|
307 |
+
cv::Mat input_data;
|
308 |
+
cv::Mat frame_clone = frame.clone();
|
309 |
+
cv::Scalar stds_scale(58.395, 57.12, 57.375);
|
310 |
+
cv::Scalar means_scale(123.675, 116.28, 103.53);
|
311 |
+
cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
|
312 |
+
frame_clone.convertTo(input_data, CV_32F);
|
313 |
+
cv::subtract(input_data, means_scale, input_data);
|
314 |
+
cv::divide(input_data, stds_scale, input_data);
|
315 |
+
|
316 |
+
float *outdata0 = nullptr;
|
317 |
+
std::vector<float> invoke_time;
|
318 |
+
for (int i = 0; i < args.invoke_nums; ++i) {
|
319 |
+
result = fast_interpreter->set_input_tensor(0, input_data.data);
|
320 |
+
if(result != EXIT_SUCCESS){
|
321 |
+
printf("interpreter->set_input_tensor() failed !\n");
|
322 |
+
return EXIT_FAILURE;
|
323 |
+
}
|
324 |
+
auto t1 = std::chrono::high_resolution_clock::now();
|
325 |
+
result = fast_interpreter->invoke();
|
326 |
+
auto t2 = std::chrono::high_resolution_clock::now();
|
327 |
+
std::chrono::duration<double> cost_time = t2 - t1;
|
328 |
+
invoke_time.push_back(cost_time.count() * 1000);
|
329 |
+
if(result != EXIT_SUCCESS){
|
330 |
+
printf("interpreter->invoke() failed !\n");
|
331 |
+
return EXIT_FAILURE;
|
332 |
+
}
|
333 |
+
uint32_t out_data_0 = 0;
|
334 |
+
result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
|
335 |
+
if(result != EXIT_SUCCESS){
|
336 |
+
printf("interpreter->get_output_tensor() 1 failed !\n");
|
337 |
+
return EXIT_FAILURE;
|
338 |
+
}
|
339 |
+
|
340 |
+
}
|
341 |
+
|
342 |
+
float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
|
343 |
+
float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
|
344 |
+
float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
|
345 |
+
float var_invoketime = 0.0f;
|
346 |
+
for (auto time : invoke_time) {
|
347 |
+
var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
|
348 |
+
}
|
349 |
+
var_invoketime /= args.invoke_nums;
|
350 |
+
printf("=======================================\n");
|
351 |
+
printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
|
352 |
+
args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
|
353 |
+
printf("=======================================\n");
|
354 |
+
|
355 |
+
cv::Mat img = post_process(frame, outdata0);
|
356 |
+
cv::imwrite("./results.png", img);
|
357 |
+
fast_interpreter->destory();
|
358 |
+
return 0;
|
359 |
+
}
|
360 |
+
|
361 |
+
|
362 |
+
int main(int argc, char* argv[]) {
|
363 |
+
Args args = parse_args(argc, argv);
|
364 |
+
return invoke(args);
|
365 |
+
}
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e3065b3055672fb4f52f561a8ffb6ccb03e501480335f2f5f97d8cfaa6f0a4c
|
3 |
+
size 72810122
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:597f83804cb9866c784b3d99209ee9e3b8b1f0b4f838c022a934ae5726f58218
|
3 |
+
size 72423358
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ab3604cdedd3dc8ff34698bd15a197690df0511eae6e4856da89187fe7d17f1
|
3 |
+
size 18537648
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg
ADDED
![]() |
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
imagenet_base_path = None
|
5 |
+
cityscapes_base_path = None
|
6 |
+
model_weights_base_path = None
|
7 |
+
|
8 |
+
CITYSCAPES_MEAN = [0.485, 0.456, 0.406]
|
9 |
+
CITYSCAPES_STD = [0.229, 0.224, 0.225]
|
10 |
+
CITYSCAPES_NUM_CLASSES = 19
|
11 |
+
CITYSCAPES_IGNORE_LABEL = 255
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
import aidlite
|
7 |
+
import os
|
8 |
+
|
9 |
+
|
10 |
+
def decode_segmap(image, nc=19):
|
11 |
+
label_colors = np.array([(0, 0, 0), # 0=background
|
12 |
+
# 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
|
13 |
+
(128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
|
14 |
+
# 6=bus, 7=car, 8=cat, 9=chair, 10=cow
|
15 |
+
(0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
|
16 |
+
# 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
|
17 |
+
(192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
|
18 |
+
# 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
|
19 |
+
(0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
|
20 |
+
r = np.zeros_like(image).astype(np.uint8)
|
21 |
+
g = np.zeros_like(image).astype(np.uint8)
|
22 |
+
b = np.zeros_like(image).astype(np.uint8)
|
23 |
+
for l in range(0, nc):
|
24 |
+
idx = image == l
|
25 |
+
r[idx] = label_colors[l, 0]
|
26 |
+
g[idx] = label_colors[l, 1]
|
27 |
+
b[idx] = label_colors[l, 2]
|
28 |
+
rgb = np.stack([r, g, b], axis=2)
|
29 |
+
return rgb
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
class ffnet54sQnn:
|
34 |
+
def __init__(self):
|
35 |
+
super().__init__()
|
36 |
+
self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
|
37 |
+
if self.model is None:
|
38 |
+
print("Create model failed !")
|
39 |
+
return
|
40 |
+
|
41 |
+
self.config = aidlite.Config.create_instance()
|
42 |
+
if self.config is None:
|
43 |
+
print("build_interpretper_from_model_and_config failed !")
|
44 |
+
return
|
45 |
+
|
46 |
+
self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
47 |
+
self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
48 |
+
self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
49 |
+
# self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
|
50 |
+
self.config.is_quantify_model = 1
|
51 |
+
|
52 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
|
53 |
+
if self.interpreter is None:
|
54 |
+
print("build_interpretper_from_model_and_config failed !")
|
55 |
+
return
|
56 |
+
input_shapes = [[1,512,1024,3]]
|
57 |
+
output_shapes = [[1,64,128,19]]
|
58 |
+
self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
|
59 |
+
output_shapes, aidlite.DataType.TYPE_FLOAT32)
|
60 |
+
|
61 |
+
if self.interpreter is None:
|
62 |
+
print("build_interpretper_from_model_and_config failed !")
|
63 |
+
result = self.interpreter.init()
|
64 |
+
if result != 0:
|
65 |
+
print(f"interpreter init failed !")
|
66 |
+
result = self.interpreter.load_model()
|
67 |
+
if result != 0:
|
68 |
+
print("interpreter load model failed !")
|
69 |
+
|
70 |
+
print(" model load success!")
|
71 |
+
|
72 |
+
def __call__(self, input):
|
73 |
+
self.interpreter.set_input_tensor(0,input)
|
74 |
+
invoke_time=[]
|
75 |
+
invoke_nums =10
|
76 |
+
for i in range(invoke_nums):
|
77 |
+
result = self.interpreter.set_input_tensor(0, input.data)
|
78 |
+
if result != 0:
|
79 |
+
print("interpreter set_input_tensor() failed")
|
80 |
+
t1=time.time()
|
81 |
+
result = self.interpreter.invoke()
|
82 |
+
cost_time = (time.time()-t1)*1000
|
83 |
+
invoke_time.append(cost_time)
|
84 |
+
|
85 |
+
max_invoke_time = max(invoke_time)
|
86 |
+
min_invoke_time = min(invoke_time)
|
87 |
+
mean_invoke_time = sum(invoke_time)/invoke_nums
|
88 |
+
var_invoketime=np.var(invoke_time)
|
89 |
+
print("====================================")
|
90 |
+
print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
91 |
+
print("====================================")
|
92 |
+
features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
|
93 |
+
return features_0
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
ffnet_segm = ffnet54sQnn()
|
100 |
+
|
101 |
+
frame_ct=0
|
102 |
+
image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
|
103 |
+
|
104 |
+
image = cv2.imread(image_path)
|
105 |
+
image=cv2.resize(image,(1024,512))
|
106 |
+
frame = np.ascontiguousarray(image[:,:,::-1])
|
107 |
+
|
108 |
+
mean_data=[123.675, 116.28, 103.53]
|
109 |
+
std_data=[58.395, 57.12, 57.375]
|
110 |
+
img_input = (frame-mean_data)/std_data # HWC
|
111 |
+
input = img_input.astype(np.float32)
|
112 |
+
input = input[np.newaxis, ...]
|
113 |
+
input_size = input.shape[1], input.shape[2] #H w
|
114 |
+
t0 = time.time()
|
115 |
+
out = ffnet_segm(input)
|
116 |
+
use_time = round((time.time() - t0) * 1000, 2)
|
117 |
+
print(f"pose detction inference_time:{use_time} ms")
|
118 |
+
out = np.transpose(out, (0, 3, 1,2))
|
119 |
+
out = torch.from_numpy(out)
|
120 |
+
|
121 |
+
output = torch.nn.functional.interpolate(
|
122 |
+
out, size=input_size, mode="bilinear", align_corners=False
|
123 |
+
)
|
124 |
+
output_data = torch.nn.functional.softmax(output, dim=1).data
|
125 |
+
max_probs, predictions = output_data.max(1)
|
126 |
+
|
127 |
+
|
128 |
+
prediction = predictions.numpy().astype(np.uint8)
|
129 |
+
test = decode_segmap( prediction[0])
|
130 |
+
|
131 |
+
cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
|
132 |
+
ffnet_segm.interpreter.destory()
|
133 |
+
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
from typing import Callable, Tuple
|
6 |
+
from models import resnet
|
7 |
+
from models.ffnet_blocks import create_ffnet
|
8 |
+
torch.set_grad_enabled(False)
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
def segmentation_ffnet54S_dBBB_mobile():
|
13 |
+
return create_ffnet(
|
14 |
+
ffnet_head_type="B_mobile",
|
15 |
+
task="segmentation_B",
|
16 |
+
num_classes=19,
|
17 |
+
model_name="ffnnet54S_dBBB_mobile",
|
18 |
+
backbone=resnet.Resnet54S_D,
|
19 |
+
pre_downsampling=False,
|
20 |
+
pretrained_weights_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth"),
|
21 |
+
strict_loading=True,
|
22 |
+
)
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
ffnet54_dbbb = segmentation_ffnet54S_dBBB_mobile()
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
num_params = sum(p.numel() for p in ffnet54_dbbb.parameters() if p.requires_grad)
|
33 |
+
print(f'Number of palm_detector parameters: {num_params}')
|
34 |
+
|
35 |
+
|
36 |
+
ffnet_seg = ffnet54_dbbb
|
37 |
+
|
38 |
+
seg_d_in = torch.randn(1, 3, 512, 1024,dtype= torch.float32)
|
39 |
+
|
40 |
+
|
41 |
+
source_model = torch.jit.trace(ffnet_seg,seg_d_in)
|
42 |
+
source_model.save("ffnet54S_dBBB_cityscapes_state_dict_quarts.pt")
|
43 |
+
print("export pose detect ok!")
|
44 |
+
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
from .ffnet_S_mobile import *
|
5 |
+
from .ffnet_NS_mobile import *
|
6 |
+
from .ffnet_gpu_large import *
|
7 |
+
from .ffnet_S_gpu_large import *
|
8 |
+
from .ffnet_N_gpu_large import *
|
9 |
+
from .ffnet_gpu_small import *
|
10 |
+
from .ffnet_S_gpu_small import *
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (366 Bytes). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc
ADDED
Binary file (5.52 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc
ADDED
Binary file (2.88 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc
ADDED
Binary file (1.68 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc
ADDED
Binary file (2.33 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc
ADDED
Binary file (9.49 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc
ADDED
Binary file (13.6 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc
ADDED
Binary file (4.33 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc
ADDED
Binary file (7.12 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc
ADDED
Binary file (758 Bytes). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc
ADDED
Binary file (13.6 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc
ADDED
Binary file (1.07 kB). View file
|
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py
ADDED
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import os
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
from models.utils import model_weight_initializer
|
11 |
+
from models import resnet
|
12 |
+
|
13 |
+
import os
|
14 |
+
import sys
|
15 |
+
import numpy as np
|
16 |
+
|
17 |
+
import torch.nn as nn
|
18 |
+
import torch._utils
|
19 |
+
import torch.nn.functional as F
|
20 |
+
|
21 |
+
from models.ffnet_blocks import create_ffnet
|
22 |
+
from models.model_registry import register_model
|
23 |
+
from config import model_weights_base_path
|
24 |
+
|
25 |
+
|
26 |
+
##########################################################################################
|
27 |
+
##### 3-Stage Mobile FFNets trained for 1024x512 images, outputing segmentation maps of
|
28 |
+
##### 256x128 pixels. These models are intended for use with the
|
29 |
+
##### cityscapes evaluation script, which uses image sizes of 2048x1024
|
30 |
+
##########################################################################################
|
31 |
+
@register_model
|
32 |
+
def segmentation_ffnet122NS_CBB_mobile_pre_down():
|
33 |
+
return create_ffnet(
|
34 |
+
ffnet_head_type="B_mobile",
|
35 |
+
task="segmentation_B",
|
36 |
+
num_classes=19,
|
37 |
+
model_name="ffnnet122NS_CBB_mobile_pre_down",
|
38 |
+
backbone=resnet.Resnet122NS,
|
39 |
+
pre_downsampling=True, # Downsample the incoming image, before passing it to the network
|
40 |
+
pretrained_weights_path=os.path.join(
|
41 |
+
model_weights_base_path,
|
42 |
+
"ffnet122NS/ffnet122NS_CBB_cityscapes_state_dict_quarts.pth",
|
43 |
+
),
|
44 |
+
strict_loading=True,
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
@register_model
|
49 |
+
def segmentation_ffnet74NS_CBB_mobile_pre_down():
|
50 |
+
return create_ffnet(
|
51 |
+
ffnet_head_type="B_mobile",
|
52 |
+
task="segmentation_B",
|
53 |
+
num_classes=19,
|
54 |
+
model_name="ffnnet74NS_CBB_mobile_pre_down",
|
55 |
+
backbone=resnet.Resnet74NS,
|
56 |
+
pre_downsampling=True, # Downsample the incoming image, before passing it to the network
|
57 |
+
pretrained_weights_path=os.path.join(
|
58 |
+
model_weights_base_path,
|
59 |
+
"ffnet74NS/ffnet74NS_CBB_cityscapes_state_dict_quarts.pth",
|
60 |
+
),
|
61 |
+
strict_loading=True,
|
62 |
+
)
|
63 |
+
|
64 |
+
|
65 |
+
@register_model
|
66 |
+
def segmentation_ffnet46NS_CBB_mobile_pre_down():
|
67 |
+
return create_ffnet(
|
68 |
+
ffnet_head_type="B_mobile",
|
69 |
+
task="segmentation_B",
|
70 |
+
num_classes=19,
|
71 |
+
model_name="ffnnet46NS_CBB_mobile_pre_down",
|
72 |
+
backbone=resnet.Resnet46NS,
|
73 |
+
pre_downsampling=True, # Downsample the incoming image, before passing it to the network
|
74 |
+
pretrained_weights_path=os.path.join(
|
75 |
+
model_weights_base_path,
|
76 |
+
"ffnet46NS/ffnet46NS_CBB_cityscapes_state_dict_quarts.pth",
|
77 |
+
),
|
78 |
+
strict_loading=True,
|
79 |
+
)
|
80 |
+
|
81 |
+
|
82 |
+
@register_model
|
83 |
+
def segmentation_ffnet122NS_CCC_mobile_pre_down():
|
84 |
+
return create_ffnet(
|
85 |
+
ffnet_head_type="C_mobile",
|
86 |
+
task="segmentation_C",
|
87 |
+
num_classes=19,
|
88 |
+
model_name="ffnnet122NS_CCC_mobile_pre_down",
|
89 |
+
backbone=resnet.Resnet122NS,
|
90 |
+
pre_downsampling=True, # Downsample the incoming image, before passing it to the network
|
91 |
+
pretrained_weights_path=os.path.join(
|
92 |
+
model_weights_base_path,
|
93 |
+
"ffnet122NS/ffnet122NS_CCC_cityscapes_state_dict_quarts.pth",
|
94 |
+
),
|
95 |
+
strict_loading=True,
|
96 |
+
)
|
97 |
+
|
98 |
+
|
99 |
+
@register_model
|
100 |
+
def segmentation_ffnet74NS_CCC_mobile_pre_down():
|
101 |
+
return create_ffnet(
|
102 |
+
ffnet_head_type="C_mobile",
|
103 |
+
task="segmentation_C",
|
104 |
+
num_classes=19,
|
105 |
+
model_name="ffnnet74NS_CCC_mobile_pre_down",
|
106 |
+
backbone=resnet.Resnet74NS,
|
107 |
+
pre_downsampling=True, # Downsample the incoming image, before passing it to the network
|
108 |
+
pretrained_weights_path=os.path.join(
|
109 |
+
model_weights_base_path,
|
110 |
+
"ffnet74NS/ffnet74NS_CCC_cityscapes_state_dict_quarts.pth",
|
111 |
+
),
|
112 |
+
strict_loading=True,
|
113 |
+
)
|
114 |
+
|
115 |
+
|
116 |
+
@register_model
|
117 |
+
def segmentation_ffnet46NS_CCC_mobile_pre_down():
|
118 |
+
return create_ffnet(
|
119 |
+
ffnet_head_type="C_mobile",
|
120 |
+
task="segmentation_C",
|
121 |
+
num_classes=19,
|
122 |
+
model_name="ffnnet46NS_CCC_mobile_pre_down",
|
123 |
+
backbone=resnet.Resnet46NS,
|
124 |
+
pre_downsampling=True, # Downsample the incoming image, before passing it to the network
|
125 |
+
pretrained_weights_path=os.path.join(
|
126 |
+
model_weights_base_path,
|
127 |
+
"ffnet46NS/ffnet46NS_CCC_cityscapes_state_dict_quarts.pth",
|
128 |
+
),
|
129 |
+
strict_loading=True,
|
130 |
+
)
|
131 |
+
|
132 |
+
|
133 |
+
##########################################################################################
|
134 |
+
##### The **actual** 3-Stage Mobile FFNets to export / use with 1024x512 images directly,
|
135 |
+
##### and output a segmentation map of 256x128 pixels
|
136 |
+
##########################################################################################
|
137 |
+
#
|
138 |
+
@register_model
|
139 |
+
def segmentation_ffnet122NS_CBB_mobile():
|
140 |
+
return create_ffnet(
|
141 |
+
ffnet_head_type="B_mobile",
|
142 |
+
task="segmentation_B",
|
143 |
+
num_classes=19,
|
144 |
+
model_name="ffnnet122NS_CBB_mobile",
|
145 |
+
backbone=resnet.Resnet122NS,
|
146 |
+
pre_downsampling=False,
|
147 |
+
pretrained_weights_path=os.path.join(
|
148 |
+
model_weights_base_path,
|
149 |
+
"ffnet122NS/ffnet122NS_CBB_cityscapes_state_dict_quarts.pth",
|
150 |
+
),
|
151 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
152 |
+
)
|
153 |
+
|
154 |
+
|
155 |
+
@register_model
|
156 |
+
def segmentation_ffnet74NS_CBB_mobile():
|
157 |
+
return create_ffnet(
|
158 |
+
ffnet_head_type="B_mobile",
|
159 |
+
task="segmentation_B",
|
160 |
+
num_classes=19,
|
161 |
+
model_name="ffnnet74NS_CBB_mobile",
|
162 |
+
backbone=resnet.Resnet74NS,
|
163 |
+
pre_downsampling=False,
|
164 |
+
pretrained_weights_path=os.path.join(
|
165 |
+
model_weights_base_path,
|
166 |
+
"ffnet74NS/ffnet74NS_CBB_cityscapes_state_dict_quarts.pth",
|
167 |
+
),
|
168 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
169 |
+
)
|
170 |
+
|
171 |
+
|
172 |
+
@register_model
|
173 |
+
def segmentation_ffnet46NS_CBB_mobile():
|
174 |
+
return create_ffnet(
|
175 |
+
ffnet_head_type="B_mobile",
|
176 |
+
task="segmentation_B",
|
177 |
+
num_classes=19,
|
178 |
+
model_name="ffnnet46NS_CBB_mobile",
|
179 |
+
backbone=resnet.Resnet46NS,
|
180 |
+
pre_downsampling=False,
|
181 |
+
pretrained_weights_path=os.path.join(
|
182 |
+
model_weights_base_path,
|
183 |
+
"ffnet46NS/ffnet46NS_CBB_cityscapes_state_dict_quarts.pth",
|
184 |
+
),
|
185 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
186 |
+
)
|
187 |
+
|
188 |
+
|
189 |
+
@register_model
|
190 |
+
def segmentation_ffnet122NS_CCC_mobile():
|
191 |
+
return create_ffnet(
|
192 |
+
ffnet_head_type="C_mobile",
|
193 |
+
task="segmentation_C",
|
194 |
+
num_classes=19,
|
195 |
+
model_name="ffnnet122NS_CCC_mobile",
|
196 |
+
backbone=resnet.Resnet122NS,
|
197 |
+
pre_downsampling=False,
|
198 |
+
pretrained_weights_path=os.path.join(
|
199 |
+
model_weights_base_path,
|
200 |
+
"ffnet122NS/ffnet122NS_CCC_cityscapes_state_dict_quarts.pth",
|
201 |
+
),
|
202 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
203 |
+
)
|
204 |
+
|
205 |
+
|
206 |
+
@register_model
|
207 |
+
def segmentation_ffnet74NS_CCC_mobile():
|
208 |
+
return create_ffnet(
|
209 |
+
ffnet_head_type="C_mobile",
|
210 |
+
task="segmentation_C",
|
211 |
+
num_classes=19,
|
212 |
+
model_name="ffnnet74NS_CCC_mobile",
|
213 |
+
backbone=resnet.Resnet74NS,
|
214 |
+
pre_downsampling=False,
|
215 |
+
pretrained_weights_path=os.path.join(
|
216 |
+
model_weights_base_path,
|
217 |
+
"ffnet74NS/ffnet74NS_CCC_cityscapes_state_dict_quarts.pth",
|
218 |
+
),
|
219 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
220 |
+
)
|
221 |
+
|
222 |
+
|
223 |
+
@register_model
|
224 |
+
def segmentation_ffnet46NS_CCC_mobile():
|
225 |
+
return create_ffnet(
|
226 |
+
ffnet_head_type="C_mobile",
|
227 |
+
task="segmentation_C",
|
228 |
+
num_classes=19,
|
229 |
+
model_name="ffnnet46NS_CCC_mobile",
|
230 |
+
backbone=resnet.Resnet46NS,
|
231 |
+
pre_downsampling=False,
|
232 |
+
pretrained_weights_path=os.path.join(
|
233 |
+
model_weights_base_path,
|
234 |
+
"ffnet46NS/ffnet46NS_CCC_cityscapes_state_dict_quarts.pth",
|
235 |
+
),
|
236 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
237 |
+
)
|
238 |
+
|
239 |
+
|
240 |
+
##########################################################################################
|
241 |
+
##### Classification models with an FFNet structure. Primarily intended for imagenet
|
242 |
+
##### initialization of FFNet.
|
243 |
+
##### See the README for the hyperparameters for training the classification models
|
244 |
+
##########################################################################################
|
245 |
+
@register_model
|
246 |
+
def classification_ffnet122NS_CBX_mobile():
|
247 |
+
return create_ffnet(
|
248 |
+
ffnet_head_type="B_mobile",
|
249 |
+
task="classification",
|
250 |
+
num_classes=1000,
|
251 |
+
model_name="ffnnet122NS_CBX_mobile",
|
252 |
+
backbone=resnet.Resnet122NS,
|
253 |
+
pretrained_weights_path=os.path.join(
|
254 |
+
model_weights_base_path,
|
255 |
+
"ffnet122NS/ffnet122NS_CBX_imagenet_state_dict_quarts.pth",
|
256 |
+
),
|
257 |
+
pretrained_backbone_only=False,
|
258 |
+
strict_loading=True,
|
259 |
+
dropout_rate=0.2,
|
260 |
+
)
|
261 |
+
|
262 |
+
|
263 |
+
@register_model
|
264 |
+
def classification_ffnet74NS_CBX_mobile():
|
265 |
+
return create_ffnet(
|
266 |
+
ffnet_head_type="B_mobile",
|
267 |
+
task="classification",
|
268 |
+
num_classes=1000,
|
269 |
+
model_name="ffnnet74NS_CBX_mobile",
|
270 |
+
backbone=resnet.Resnet74NS,
|
271 |
+
pretrained_weights_path=os.path.join(
|
272 |
+
model_weights_base_path,
|
273 |
+
"ffnet74NS/ffnet74NS_CBX_imagenet_state_dict_quarts.pth",
|
274 |
+
),
|
275 |
+
pretrained_backbone_only=False,
|
276 |
+
strict_loading=True,
|
277 |
+
dropout_rate=0.2,
|
278 |
+
)
|
279 |
+
|
280 |
+
|
281 |
+
@register_model
|
282 |
+
def classification_ffnet46NS_CBX_mobile():
|
283 |
+
return create_ffnet(
|
284 |
+
ffnet_head_type="B_mobile",
|
285 |
+
task="classification",
|
286 |
+
num_classes=1000,
|
287 |
+
model_name="ffnnet46NS_CBX_mobile",
|
288 |
+
backbone=resnet.Resnet46NS,
|
289 |
+
pretrained_weights_path=os.path.join(
|
290 |
+
model_weights_base_path,
|
291 |
+
"ffnet46NS/ffnet46NS_CBX_imagenet_state_dict_quarts.pth",
|
292 |
+
),
|
293 |
+
pretrained_backbone_only=False,
|
294 |
+
strict_loading=True,
|
295 |
+
dropout_rate=0.2,
|
296 |
+
)
|
297 |
+
|
298 |
+
|
299 |
+
##########################################################################################
|
300 |
+
##### This is an example of how these FFNet models, which are intended for 1024x512 images
|
301 |
+
##### would be initialized for training on cityscapes with 2048x1024 images
|
302 |
+
##########################################################################################
|
303 |
+
@register_model
|
304 |
+
def segmentation_ffnet122NS_CBB_mobile_pre_down_train():
|
305 |
+
return create_ffnet(
|
306 |
+
ffnet_head_type="B_mobile",
|
307 |
+
task="segmentation_B",
|
308 |
+
num_classes=19,
|
309 |
+
model_name="ffnnet122NS_CBB_mobile_pre_down",
|
310 |
+
backbone=resnet.Resnet122NS,
|
311 |
+
pre_downsampling=True,
|
312 |
+
pretrained_weights_path=os.path.join(
|
313 |
+
model_weights_base_path,
|
314 |
+
"ffnet122NS/ffnet122NS_CBX_imagenet_state_dict_quarts.pth",
|
315 |
+
),
|
316 |
+
pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
|
317 |
+
strict_loading=False, # Strict loading is false here because the weights are going into a model with pre_downsampling=True
|
318 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import os
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
from models import resnet
|
11 |
+
|
12 |
+
import os
|
13 |
+
import sys
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
+
import torch.nn as nn
|
17 |
+
import torch._utils
|
18 |
+
import torch.nn.functional as F
|
19 |
+
|
20 |
+
from models.ffnet_blocks import create_ffnet
|
21 |
+
from models.model_registry import register_model
|
22 |
+
from config import model_weights_base_path
|
23 |
+
|
24 |
+
|
25 |
+
##########################################################################################
|
26 |
+
##### 3-Stage GPU FFNets. These are trained for use with image sizes of 2048x1024 and
|
27 |
+
##### output segmentation maps of size 512x256 pixels
|
28 |
+
##########################################################################################
|
29 |
+
@register_model
|
30 |
+
def segmentation_ffnet122N_CBB():
|
31 |
+
return create_ffnet(
|
32 |
+
ffnet_head_type="B",
|
33 |
+
task="segmentation_B",
|
34 |
+
num_classes=19,
|
35 |
+
model_name="ffnnet122N_CBB",
|
36 |
+
backbone=resnet.Resnet122N,
|
37 |
+
pre_downsampling=False,
|
38 |
+
pretrained_weights_path=os.path.join(
|
39 |
+
model_weights_base_path,
|
40 |
+
"ffnet122N/ffnet122N_CBB_cityscapes_state_dict_quarts.pth",
|
41 |
+
),
|
42 |
+
strict_loading=True,
|
43 |
+
)
|
44 |
+
|
45 |
+
|
46 |
+
@register_model
|
47 |
+
def segmentation_ffnet74N_CBB():
|
48 |
+
return create_ffnet(
|
49 |
+
ffnet_head_type="B",
|
50 |
+
task="segmentation_B",
|
51 |
+
num_classes=19,
|
52 |
+
model_name="ffnnet74N_CBB",
|
53 |
+
backbone=resnet.Resnet74N,
|
54 |
+
pre_downsampling=False,
|
55 |
+
pretrained_weights_path=os.path.join(
|
56 |
+
model_weights_base_path,
|
57 |
+
"ffnet74N/ffnet74N_CBB_cityscapes_state_dict_quarts.pth",
|
58 |
+
),
|
59 |
+
strict_loading=True,
|
60 |
+
)
|
61 |
+
|
62 |
+
|
63 |
+
@register_model
|
64 |
+
def segmentation_ffnet46N_CBB():
|
65 |
+
return create_ffnet(
|
66 |
+
ffnet_head_type="B",
|
67 |
+
task="segmentation_B",
|
68 |
+
num_classes=19,
|
69 |
+
model_name="ffnnet46N_CBB",
|
70 |
+
backbone=resnet.Resnet46N,
|
71 |
+
pre_downsampling=False,
|
72 |
+
pretrained_weights_path=os.path.join(
|
73 |
+
model_weights_base_path,
|
74 |
+
"ffnet46N/ffnet46N_CBB_cityscapes_state_dict_quarts.pth",
|
75 |
+
),
|
76 |
+
strict_loading=True,
|
77 |
+
)
|
78 |
+
|
79 |
+
|
80 |
+
##########################################################################################
|
81 |
+
##### Classification models with an FFNet structure. Primarily intended for imagenet
|
82 |
+
##### initialization of FFNet.
|
83 |
+
##### See the README for the hyperparameters for training the classification models
|
84 |
+
##########################################################################################
|
85 |
+
@register_model
|
86 |
+
def classification_ffnet122N_CBX():
|
87 |
+
return create_ffnet(
|
88 |
+
ffnet_head_type="B",
|
89 |
+
task="classification",
|
90 |
+
num_classes=1000,
|
91 |
+
model_name="ffnnet122N_CBX",
|
92 |
+
backbone=resnet.Resnet122N,
|
93 |
+
pretrained_weights_path=os.path.join(
|
94 |
+
model_weights_base_path,
|
95 |
+
"ffnet122N/ffnet122N_CBX_imagenet_state_dict_quarts.pth",
|
96 |
+
),
|
97 |
+
pretrained_backbone_only=False,
|
98 |
+
strict_loading=True,
|
99 |
+
dropout_rate=0.2,
|
100 |
+
)
|
101 |
+
|
102 |
+
|
103 |
+
@register_model
|
104 |
+
def classification_ffnet74N_CBX():
|
105 |
+
return create_ffnet(
|
106 |
+
ffnet_head_type="B",
|
107 |
+
task="classification",
|
108 |
+
num_classes=1000,
|
109 |
+
model_name="ffnnet74N_CBX",
|
110 |
+
backbone=resnet.Resnet74N,
|
111 |
+
pretrained_weights_path=os.path.join(
|
112 |
+
model_weights_base_path,
|
113 |
+
"ffnet74N/ffnet74N_CBX_imagenet_state_dict_quarts.pth",
|
114 |
+
),
|
115 |
+
pretrained_backbone_only=False,
|
116 |
+
strict_loading=True,
|
117 |
+
dropout_rate=0.2,
|
118 |
+
)
|
119 |
+
|
120 |
+
|
121 |
+
@register_model
|
122 |
+
def classification_ffnet46N_CBX():
|
123 |
+
return create_ffnet(
|
124 |
+
ffnet_head_type="B",
|
125 |
+
task="classification",
|
126 |
+
num_classes=1000,
|
127 |
+
model_name="ffnnet46N_CBX",
|
128 |
+
backbone=resnet.Resnet46N,
|
129 |
+
pretrained_weights_path=os.path.join(
|
130 |
+
model_weights_base_path,
|
131 |
+
"ffnet46N/ffnet46N_CBX_imagenet_state_dict_quarts.pth",
|
132 |
+
),
|
133 |
+
pretrained_backbone_only=False,
|
134 |
+
strict_loading=True,
|
135 |
+
dropout_rate=0.2,
|
136 |
+
)
|
137 |
+
|
138 |
+
|
139 |
+
##########################################################################################
|
140 |
+
##### This is an example of how these FFNet models would be initialized for training on
|
141 |
+
##### cityscapes with 2048x1024 images
|
142 |
+
##########################################################################################
|
143 |
+
@register_model
|
144 |
+
def segmentation_ffnet122N_CBB_train():
|
145 |
+
return create_ffnet(
|
146 |
+
ffnet_head_type="B",
|
147 |
+
task="segmentation_B",
|
148 |
+
num_classes=19,
|
149 |
+
model_name="ffnnet122N_CBB",
|
150 |
+
backbone=resnet.Resnet122N,
|
151 |
+
pretrained_weights_path=os.path.join(
|
152 |
+
model_weights_base_path,
|
153 |
+
"ffnet122N/ffnet122N_CBX_imagenet_state_dict_quarts.pth",
|
154 |
+
),
|
155 |
+
pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
|
156 |
+
strict_loading=False,
|
157 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import os
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
from models import resnet
|
11 |
+
|
12 |
+
import os
|
13 |
+
import sys
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
+
import torch.nn as nn
|
17 |
+
import torch._utils
|
18 |
+
import torch.nn.functional as F
|
19 |
+
|
20 |
+
from models.ffnet_blocks import create_ffnet
|
21 |
+
from models.model_registry import register_model
|
22 |
+
from config import model_weights_base_path
|
23 |
+
|
24 |
+
|
25 |
+
##########################################################################################
|
26 |
+
##### 4-Stage GPU FFNets with Slim backbone.
|
27 |
+
##### These are trained for use with image sizes of 2048x1024
|
28 |
+
##### and output a segmentation map of 512x256 pixels
|
29 |
+
##########################################################################################
|
30 |
+
@register_model
|
31 |
+
def segmentation_ffnet150S_BBB():
|
32 |
+
return create_ffnet(
|
33 |
+
ffnet_head_type="B",
|
34 |
+
task="segmentation_B",
|
35 |
+
num_classes=19,
|
36 |
+
model_name="ffnnet150S_BBB",
|
37 |
+
backbone=resnet.Resnet150S,
|
38 |
+
pretrained_weights_path=os.path.join(
|
39 |
+
model_weights_base_path,
|
40 |
+
"ffnet150S/ffnet150S_BBB_gpu_cityscapes_state_dict_quarts.pth",
|
41 |
+
),
|
42 |
+
strict_loading=True,
|
43 |
+
)
|
44 |
+
|
45 |
+
|
46 |
+
@register_model
|
47 |
+
def segmentation_ffnet86S_BBB():
|
48 |
+
return create_ffnet(
|
49 |
+
ffnet_head_type="B",
|
50 |
+
task="segmentation_B",
|
51 |
+
num_classes=19,
|
52 |
+
model_name="ffnnet86S_BBB",
|
53 |
+
backbone=resnet.Resnet86S,
|
54 |
+
pretrained_weights_path=os.path.join(
|
55 |
+
model_weights_base_path,
|
56 |
+
"ffnet86S/ffnet86S_BBB_gpu_cityscapes_state_dict_quarts.pth",
|
57 |
+
),
|
58 |
+
strict_loading=True,
|
59 |
+
)
|
60 |
+
|
61 |
+
|
62 |
+
##########################################################################################
|
63 |
+
##### This is an example of how these FFNet models would be initialized for training on
|
64 |
+
##### cityscapes with 2048x1024 images
|
65 |
+
##########################################################################################
|
66 |
+
@register_model
|
67 |
+
def segmentation_ffnet86S_BBB_train():
|
68 |
+
return create_ffnet(
|
69 |
+
ffnet_head_type="B",
|
70 |
+
task="segmentation_B",
|
71 |
+
num_classes=19,
|
72 |
+
model_name="ffnnet86S_BBB",
|
73 |
+
backbone=resnet.Resnet86S,
|
74 |
+
pretrained_weights_path=os.path.join(
|
75 |
+
model_weights_base_path,
|
76 |
+
"ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
|
77 |
+
),
|
78 |
+
pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
|
79 |
+
strict_loading=False,
|
80 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import os
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
from models import resnet
|
11 |
+
|
12 |
+
import os
|
13 |
+
import sys
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
+
import torch.nn as nn
|
17 |
+
import torch._utils
|
18 |
+
import torch.nn.functional as F
|
19 |
+
|
20 |
+
from models.ffnet_blocks import create_ffnet
|
21 |
+
from models.model_registry import register_model
|
22 |
+
from config import model_weights_base_path
|
23 |
+
|
24 |
+
|
25 |
+
##########################################################################################
|
26 |
+
##### 4-Stage GPU FFNets with Slim backbone.
|
27 |
+
##### These are trained for use with image sizes of 2048x1024
|
28 |
+
##### and output a segmentation map of 256x128 pixels
|
29 |
+
##########################################################################################
|
30 |
+
@register_model
|
31 |
+
def segmentation_ffnet150S_dBBB():
|
32 |
+
return create_ffnet(
|
33 |
+
ffnet_head_type="B",
|
34 |
+
task="segmentation_B",
|
35 |
+
num_classes=19,
|
36 |
+
model_name="ffnnet150S_dBBB",
|
37 |
+
backbone=resnet.Resnet150S_D,
|
38 |
+
pre_downsampling=False,
|
39 |
+
pretrained_weights_path=os.path.join(
|
40 |
+
model_weights_base_path,
|
41 |
+
"ffnet150S/ffnet150S_dBBB_gpu_cityscapes_state_dict_quarts.pth",
|
42 |
+
),
|
43 |
+
strict_loading=True,
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
@register_model
|
48 |
+
def segmentation_ffnet86S_dBBB():
|
49 |
+
return create_ffnet(
|
50 |
+
ffnet_head_type="B",
|
51 |
+
task="segmentation_B",
|
52 |
+
num_classes=19,
|
53 |
+
model_name="ffnnet86S_dBBB",
|
54 |
+
backbone=resnet.Resnet86S_D,
|
55 |
+
pre_downsampling=False,
|
56 |
+
pretrained_weights_path=os.path.join(
|
57 |
+
model_weights_base_path,
|
58 |
+
"ffnet86S/ffnet86S_dBBB_gpu_cityscapes_state_dict_quarts.pth",
|
59 |
+
),
|
60 |
+
strict_loading=True,
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
##########################################################################################
|
65 |
+
##### Classification models with an FFNet structure. Primarily intended for imagenet
|
66 |
+
##### initialization of FFNet.
|
67 |
+
##### See the README for the hyperparameters for training the classification models
|
68 |
+
##########################################################################################
|
69 |
+
@register_model
|
70 |
+
def classification_ffnet150S_BBX():
|
71 |
+
return create_ffnet(
|
72 |
+
ffnet_head_type="B",
|
73 |
+
task="classification",
|
74 |
+
num_classes=1000,
|
75 |
+
model_name="ffnnet150S_BBX",
|
76 |
+
backbone=resnet.Resnet150S,
|
77 |
+
pretrained_weights_path=os.path.join(
|
78 |
+
model_weights_base_path,
|
79 |
+
"ffnet150S/ffnet150S_BBX_gpu_imagenet_state_dict_quarts.pth",
|
80 |
+
),
|
81 |
+
strict_loading=True,
|
82 |
+
)
|
83 |
+
|
84 |
+
|
85 |
+
@register_model
|
86 |
+
def classification_ffnet86S_BBX():
|
87 |
+
return create_ffnet(
|
88 |
+
ffnet_head_type="B",
|
89 |
+
task="classification",
|
90 |
+
num_classes=1000,
|
91 |
+
model_name="ffnnet86S_BBX",
|
92 |
+
backbone=resnet.Resnet86S,
|
93 |
+
pretrained_weights_path=os.path.join(
|
94 |
+
model_weights_base_path,
|
95 |
+
"ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
|
96 |
+
),
|
97 |
+
strict_loading=True,
|
98 |
+
)
|
99 |
+
|
100 |
+
|
101 |
+
##########################################################################################
|
102 |
+
##### This is an example of how these FFNet models would be initialized for training on
|
103 |
+
##### cityscapes with 2048x1024 images
|
104 |
+
##########################################################################################
|
105 |
+
@register_model
|
106 |
+
def segmentation_ffnet86S_dBBB_train():
|
107 |
+
return create_ffnet(
|
108 |
+
ffnet_head_type="B",
|
109 |
+
task="segmentation_B",
|
110 |
+
num_classes=19,
|
111 |
+
model_name="ffnnet86S_dBBB",
|
112 |
+
backbone=resnet.Resnet86S_D,
|
113 |
+
pretrained_weights_path=os.path.join(
|
114 |
+
model_weights_base_path,
|
115 |
+
"ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
|
116 |
+
),
|
117 |
+
pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
|
118 |
+
strict_loading=False,
|
119 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py
ADDED
@@ -0,0 +1,555 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import os
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
from models import resnet
|
11 |
+
|
12 |
+
import os
|
13 |
+
import sys
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
+
import torch.nn as nn
|
17 |
+
import torch._utils
|
18 |
+
import torch.nn.functional as F
|
19 |
+
|
20 |
+
from models.ffnet_blocks import create_ffnet
|
21 |
+
from models.model_registry import register_model
|
22 |
+
from config import model_weights_base_path
|
23 |
+
|
24 |
+
|
25 |
+
##########################################################################################
|
26 |
+
##### 4-Stage Mobile FFNets with Slim backbone.
|
27 |
+
##### These are trained for use with image sizes of 2048x1024, and output a segmentation map
|
28 |
+
##### of 256x128 pixels
|
29 |
+
##########################################################################################
|
30 |
+
@register_model
|
31 |
+
def segmentation_ffnet86S_dBBB_mobile():
|
32 |
+
return create_ffnet(
|
33 |
+
ffnet_head_type="B_mobile",
|
34 |
+
task="segmentation_B",
|
35 |
+
num_classes=19,
|
36 |
+
model_name="ffnnet86S_dBBB_mobile",
|
37 |
+
backbone=resnet.Resnet86S_D,
|
38 |
+
pre_downsampling=False,
|
39 |
+
pretrained_weights_path=os.path.join(
|
40 |
+
model_weights_base_path,
|
41 |
+
"ffnet86S/ffnet86S_dBBB_cityscapes_state_dict_quarts.pth",
|
42 |
+
),
|
43 |
+
strict_loading=True,
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
@register_model
|
48 |
+
def segmentation_ffnet78S_dBBB_mobile():
|
49 |
+
return create_ffnet(
|
50 |
+
ffnet_head_type="B_mobile",
|
51 |
+
task="segmentation_B",
|
52 |
+
num_classes=19,
|
53 |
+
model_name="ffnnet78S_dBBB_mobile",
|
54 |
+
backbone=resnet.Resnet78S_D,
|
55 |
+
pre_downsampling=False,
|
56 |
+
pretrained_weights_path=os.path.join(
|
57 |
+
model_weights_base_path,
|
58 |
+
"ffnet78S/ffnet78S_dBBB_cityscapes_state_dict_quarts.pth",
|
59 |
+
),
|
60 |
+
strict_loading=True,
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
@register_model
|
65 |
+
def segmentation_ffnet54S_dBBB_mobile():
|
66 |
+
return create_ffnet(
|
67 |
+
ffnet_head_type="B_mobile",
|
68 |
+
task="segmentation_B",
|
69 |
+
num_classes=19,
|
70 |
+
model_name="ffnnet54S_dBBB_mobile",
|
71 |
+
backbone=resnet.Resnet54S_D,
|
72 |
+
pre_downsampling=False,
|
73 |
+
pretrained_weights_path=os.path.join(
|
74 |
+
model_weights_base_path,
|
75 |
+
"ffnet54S/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth",
|
76 |
+
),
|
77 |
+
strict_loading=True,
|
78 |
+
)
|
79 |
+
|
80 |
+
|
81 |
+
@register_model
|
82 |
+
def segmentation_ffnet40S_dBBB_mobile():
|
83 |
+
return create_ffnet(
|
84 |
+
ffnet_head_type="B_mobile",
|
85 |
+
task="segmentation_B",
|
86 |
+
num_classes=19,
|
87 |
+
model_name="ffnnet40S_dBBB_mobile",
|
88 |
+
backbone=resnet.Resnet40S_D,
|
89 |
+
pre_downsampling=False,
|
90 |
+
pretrained_weights_path=os.path.join(
|
91 |
+
model_weights_base_path,
|
92 |
+
"ffnet40S/ffnet40S_dBBB_cityscapes_state_dict_quarts.pth",
|
93 |
+
),
|
94 |
+
strict_loading=True,
|
95 |
+
)
|
96 |
+
|
97 |
+
|
98 |
+
##########################################################################################
|
99 |
+
##### 4-Stage Mobile FFNets with Slim backbone, trained for use with image sizes of 1024x512
|
100 |
+
##### and output a segmentation map of 256x128 pixels
|
101 |
+
##### These versions are meant for use with the cityscapes evaluation script, which provides
|
102 |
+
##### inputs at 2048x1024
|
103 |
+
##########################################################################################
|
104 |
+
@register_model
|
105 |
+
def segmentation_ffnet150S_BBB_mobile_pre_down():
|
106 |
+
return create_ffnet(
|
107 |
+
ffnet_head_type="B_mobile",
|
108 |
+
task="segmentation_B",
|
109 |
+
num_classes=19,
|
110 |
+
model_name="ffnnet150S_BBB_mobile_pre_down",
|
111 |
+
backbone=resnet.Resnet150S,
|
112 |
+
pre_downsampling=True,
|
113 |
+
pretrained_weights_path=os.path.join(
|
114 |
+
model_weights_base_path,
|
115 |
+
"ffnet150S/ffnet150S_BBB_cityscapes_state_dict_quarts.pth",
|
116 |
+
),
|
117 |
+
strict_loading=True,
|
118 |
+
)
|
119 |
+
|
120 |
+
|
121 |
+
@register_model
|
122 |
+
def segmentation_ffnet86S_BBB_mobile_pre_down():
|
123 |
+
return create_ffnet(
|
124 |
+
ffnet_head_type="B_mobile",
|
125 |
+
task="segmentation_B",
|
126 |
+
num_classes=19,
|
127 |
+
model_name="ffnnet86S_BBB_mobile_pre_down",
|
128 |
+
backbone=resnet.Resnet86S,
|
129 |
+
pre_downsampling=True,
|
130 |
+
pretrained_weights_path=os.path.join(
|
131 |
+
model_weights_base_path,
|
132 |
+
"ffnet86S/ffnet86S_BBB_cityscapes_state_dict_quarts.pth",
|
133 |
+
),
|
134 |
+
strict_loading=True,
|
135 |
+
)
|
136 |
+
|
137 |
+
|
138 |
+
@register_model
|
139 |
+
def segmentation_ffnet78S_BBB_mobile_pre_down():
|
140 |
+
return create_ffnet(
|
141 |
+
ffnet_head_type="B_mobile",
|
142 |
+
task="segmentation_B",
|
143 |
+
num_classes=19,
|
144 |
+
model_name="ffnnet78S_BBB_mobile_pre_down",
|
145 |
+
backbone=resnet.Resnet78S,
|
146 |
+
pre_downsampling=True,
|
147 |
+
pretrained_weights_path=os.path.join(
|
148 |
+
model_weights_base_path,
|
149 |
+
"ffnet78S/ffnet78S_BBB_cityscapes_state_dict_quarts.pth",
|
150 |
+
),
|
151 |
+
strict_loading=True,
|
152 |
+
)
|
153 |
+
|
154 |
+
|
155 |
+
@register_model
|
156 |
+
def segmentation_ffnet54S_BBB_mobile_pre_down():
|
157 |
+
return create_ffnet(
|
158 |
+
ffnet_head_type="B_mobile",
|
159 |
+
task="segmentation_B",
|
160 |
+
num_classes=19,
|
161 |
+
model_name="ffnnet54S_BBB_mobile_pre_down",
|
162 |
+
backbone=resnet.Resnet54S,
|
163 |
+
pre_downsampling=True,
|
164 |
+
pretrained_weights_path=os.path.join(
|
165 |
+
model_weights_base_path,
|
166 |
+
"ffnet54S/ffnet54S_BBB_cityscapes_state_dict_quarts.pth",
|
167 |
+
),
|
168 |
+
strict_loading=True,
|
169 |
+
)
|
170 |
+
|
171 |
+
|
172 |
+
@register_model
|
173 |
+
def segmentation_ffnet40S_BBB_mobile_pre_down():
|
174 |
+
return create_ffnet(
|
175 |
+
ffnet_head_type="B_mobile",
|
176 |
+
task="segmentation_B",
|
177 |
+
num_classes=19,
|
178 |
+
model_name="ffnnet40S_BBB_mobile_pre_down",
|
179 |
+
backbone=resnet.Resnet40S,
|
180 |
+
pre_downsampling=True,
|
181 |
+
pretrained_weights_path=os.path.join(
|
182 |
+
model_weights_base_path,
|
183 |
+
"ffnet40S/ffnet40S_BBB_cityscapes_state_dict_quarts.pth",
|
184 |
+
),
|
185 |
+
strict_loading=True,
|
186 |
+
)
|
187 |
+
|
188 |
+
|
189 |
+
@register_model
|
190 |
+
def segmentation_ffnet150S_BCC_mobile_pre_down():
|
191 |
+
return create_ffnet(
|
192 |
+
ffnet_head_type="C_mobile",
|
193 |
+
task="segmentation_C",
|
194 |
+
num_classes=19,
|
195 |
+
model_name="ffnnet150S_BCC_mobile_pre_down",
|
196 |
+
backbone=resnet.Resnet150S,
|
197 |
+
pre_downsampling=True,
|
198 |
+
pretrained_weights_path=os.path.join(
|
199 |
+
model_weights_base_path,
|
200 |
+
"ffnet150S/ffnet150S_BCC_cityscapes_state_dict_quarts.pth",
|
201 |
+
),
|
202 |
+
strict_loading=True,
|
203 |
+
)
|
204 |
+
|
205 |
+
|
206 |
+
@register_model
|
207 |
+
def segmentation_ffnet86S_BCC_mobile_pre_down():
|
208 |
+
return create_ffnet(
|
209 |
+
ffnet_head_type="C_mobile",
|
210 |
+
task="segmentation_C",
|
211 |
+
num_classes=19,
|
212 |
+
model_name="ffnnet86S_BCC_mobile_pre_down",
|
213 |
+
backbone=resnet.Resnet86S,
|
214 |
+
pre_downsampling=True,
|
215 |
+
pretrained_weights_path=os.path.join(
|
216 |
+
model_weights_base_path,
|
217 |
+
"ffnet86S/ffnet86S_BCC_cityscapes_state_dict_quarts.pth",
|
218 |
+
),
|
219 |
+
strict_loading=True,
|
220 |
+
)
|
221 |
+
|
222 |
+
|
223 |
+
@register_model
|
224 |
+
def segmentation_ffnet78S_BCC_mobile_pre_down():
|
225 |
+
return create_ffnet(
|
226 |
+
ffnet_head_type="C_mobile",
|
227 |
+
task="segmentation_C",
|
228 |
+
num_classes=19,
|
229 |
+
model_name="ffnnet78S_BCC_mobile_pre_down",
|
230 |
+
backbone=resnet.Resnet78S,
|
231 |
+
pre_downsampling=True,
|
232 |
+
pretrained_weights_path=os.path.join(
|
233 |
+
model_weights_base_path,
|
234 |
+
"ffnet78S/ffnet78S_BCC_cityscapes_state_dict_quarts.pth",
|
235 |
+
),
|
236 |
+
strict_loading=True,
|
237 |
+
)
|
238 |
+
|
239 |
+
|
240 |
+
@register_model
|
241 |
+
def segmentation_ffnet54S_BCC_mobile_pre_down():
|
242 |
+
return create_ffnet(
|
243 |
+
ffnet_head_type="C_mobile",
|
244 |
+
task="segmentation_C",
|
245 |
+
num_classes=19,
|
246 |
+
model_name="ffnnet54S_BCC_mobile_pre_down",
|
247 |
+
backbone=resnet.Resnet54S,
|
248 |
+
pre_downsampling=True,
|
249 |
+
pretrained_weights_path=os.path.join(
|
250 |
+
model_weights_base_path,
|
251 |
+
"ffnet54S/ffnet54S_BCC_cityscapes_state_dict_quarts.pth",
|
252 |
+
),
|
253 |
+
strict_loading=True,
|
254 |
+
)
|
255 |
+
|
256 |
+
|
257 |
+
@register_model
|
258 |
+
def segmentation_ffnet40S_BCC_mobile_pre_down():
|
259 |
+
return create_ffnet(
|
260 |
+
ffnet_head_type="C_mobile",
|
261 |
+
task="segmentation_C",
|
262 |
+
num_classes=19,
|
263 |
+
model_name="ffnnet40S_BCC_mobile_pre_down",
|
264 |
+
backbone=resnet.Resnet40S,
|
265 |
+
pre_downsampling=True,
|
266 |
+
pretrained_weights_path=os.path.join(
|
267 |
+
model_weights_base_path,
|
268 |
+
"ffnet40S/ffnet40S_BCC_cityscapes_state_dict_quarts.pth",
|
269 |
+
),
|
270 |
+
strict_loading=True,
|
271 |
+
)
|
272 |
+
|
273 |
+
|
274 |
+
##########################################################################################
|
275 |
+
##### 4-Stage Mobile FFNets with Slim backbone.
|
276 |
+
##### These are the actual models, trained for use with image sizes of 1024x512
|
277 |
+
##### and output a segmentation map of 256x128 pixels
|
278 |
+
##### See the versions with _pre_down suffix for models to use with the cityscapes evaluation script
|
279 |
+
##########################################################################################
|
280 |
+
@register_model
|
281 |
+
def segmentation_ffnet150S_BBB_mobile():
|
282 |
+
return create_ffnet(
|
283 |
+
ffnet_head_type="B_mobile",
|
284 |
+
task="segmentation_B",
|
285 |
+
num_classes=19,
|
286 |
+
model_name="ffnnet150S_BBB_mobile",
|
287 |
+
backbone=resnet.Resnet150S,
|
288 |
+
pre_downsampling=False,
|
289 |
+
pretrained_weights_path=os.path.join(
|
290 |
+
model_weights_base_path,
|
291 |
+
"ffnet150S/ffnet150S_BBB_cityscapes_state_dict_quarts.pth",
|
292 |
+
),
|
293 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
294 |
+
)
|
295 |
+
|
296 |
+
|
297 |
+
@register_model
|
298 |
+
def segmentation_ffnet86S_BBB_mobile():
|
299 |
+
return create_ffnet(
|
300 |
+
ffnet_head_type="B_mobile",
|
301 |
+
task="segmentation_B",
|
302 |
+
num_classes=19,
|
303 |
+
model_name="ffnnet86S_BBB_mobile",
|
304 |
+
backbone=resnet.Resnet86S,
|
305 |
+
pre_downsampling=False,
|
306 |
+
pretrained_weights_path=os.path.join(
|
307 |
+
model_weights_base_path,
|
308 |
+
"ffnet86S/ffnet86S_BBB_cityscapes_state_dict_quarts.pth",
|
309 |
+
),
|
310 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
311 |
+
)
|
312 |
+
|
313 |
+
|
314 |
+
@register_model
|
315 |
+
def segmentation_ffnet78S_BBB_mobile():
|
316 |
+
return create_ffnet(
|
317 |
+
ffnet_head_type="B_mobile",
|
318 |
+
task="segmentation_B",
|
319 |
+
num_classes=19,
|
320 |
+
model_name="ffnnet78S_BBB_mobile",
|
321 |
+
backbone=resnet.Resnet78S,
|
322 |
+
pre_downsampling=False,
|
323 |
+
pretrained_weights_path=os.path.join(
|
324 |
+
model_weights_base_path,
|
325 |
+
"ffnet78S/ffnet78S_BBB_cityscapes_state_dict_quarts.pth",
|
326 |
+
),
|
327 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
328 |
+
)
|
329 |
+
|
330 |
+
|
331 |
+
@register_model
|
332 |
+
def segmentation_ffnet54S_BBB_mobile():
|
333 |
+
return create_ffnet(
|
334 |
+
ffnet_head_type="B_mobile",
|
335 |
+
task="segmentation_B",
|
336 |
+
num_classes=19,
|
337 |
+
model_name="ffnnet54S_BBB_mobile",
|
338 |
+
backbone=resnet.Resnet54S,
|
339 |
+
pre_downsampling=False,
|
340 |
+
pretrained_weights_path=os.path.join(
|
341 |
+
model_weights_base_path,
|
342 |
+
"ffnet54S/ffnet54S_BBB_cityscapes_state_dict_quarts.pth",
|
343 |
+
),
|
344 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
345 |
+
)
|
346 |
+
|
347 |
+
|
348 |
+
@register_model
|
349 |
+
def segmentation_ffnet40S_BBB_mobile():
|
350 |
+
return create_ffnet(
|
351 |
+
ffnet_head_type="B_mobile",
|
352 |
+
task="segmentation_B",
|
353 |
+
num_classes=19,
|
354 |
+
model_name="ffnnet40S_BBB_mobile",
|
355 |
+
backbone=resnet.Resnet40S,
|
356 |
+
pre_downsampling=False,
|
357 |
+
pretrained_weights_path=os.path.join(
|
358 |
+
model_weights_base_path,
|
359 |
+
"ffnet40S/ffnet40S_BBB_cityscapes_state_dict_quarts.pth",
|
360 |
+
),
|
361 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
362 |
+
)
|
363 |
+
|
364 |
+
|
365 |
+
@register_model
|
366 |
+
def segmentation_ffnet150S_BCC_mobile():
|
367 |
+
return create_ffnet(
|
368 |
+
ffnet_head_type="C_mobile",
|
369 |
+
task="segmentation_C",
|
370 |
+
num_classes=19,
|
371 |
+
model_name="ffnnet150S_BCC_mobile",
|
372 |
+
backbone=resnet.Resnet150S,
|
373 |
+
pre_downsampling=False,
|
374 |
+
pretrained_weights_path=os.path.join(
|
375 |
+
model_weights_base_path,
|
376 |
+
"ffnet150S/ffnet150S_BCC_cityscapes_state_dict_quarts.pth",
|
377 |
+
),
|
378 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
379 |
+
)
|
380 |
+
|
381 |
+
|
382 |
+
@register_model
|
383 |
+
def segmentation_ffnet86S_BCC_mobile():
|
384 |
+
return create_ffnet(
|
385 |
+
ffnet_head_type="C_mobile",
|
386 |
+
task="segmentation_C",
|
387 |
+
num_classes=19,
|
388 |
+
model_name="ffnnet86S_BCC_mobile",
|
389 |
+
backbone=resnet.Resnet86S,
|
390 |
+
pre_downsampling=False,
|
391 |
+
pretrained_weights_path=os.path.join(
|
392 |
+
model_weights_base_path,
|
393 |
+
"ffnet86S/ffnet86S_BCC_cityscapes_state_dict_quarts.pth",
|
394 |
+
),
|
395 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
396 |
+
)
|
397 |
+
|
398 |
+
|
399 |
+
@register_model
|
400 |
+
def segmentation_ffnet78S_BCC_mobile():
|
401 |
+
return create_ffnet(
|
402 |
+
ffnet_head_type="C_mobile",
|
403 |
+
task="segmentation_C",
|
404 |
+
num_classes=19,
|
405 |
+
model_name="ffnnet78S_BCC_mobile",
|
406 |
+
backbone=resnet.Resnet78S,
|
407 |
+
pre_downsampling=False,
|
408 |
+
pretrained_weights_path=os.path.join(
|
409 |
+
model_weights_base_path,
|
410 |
+
"ffnet78S/ffnet78S_BCC_cityscapes_state_dict_quarts.pth",
|
411 |
+
),
|
412 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
413 |
+
)
|
414 |
+
|
415 |
+
|
416 |
+
@register_model
|
417 |
+
def segmentation_ffnet54S_BCC_mobile():
|
418 |
+
return create_ffnet(
|
419 |
+
ffnet_head_type="C_mobile",
|
420 |
+
task="segmentation_C",
|
421 |
+
num_classes=19,
|
422 |
+
model_name="ffnnet54S_BCC_mobile",
|
423 |
+
backbone=resnet.Resnet54S,
|
424 |
+
pre_downsampling=False,
|
425 |
+
pretrained_weights_path=os.path.join(
|
426 |
+
model_weights_base_path,
|
427 |
+
"ffnet54S/ffnet54S_BCC_cityscapes_state_dict_quarts.pth",
|
428 |
+
),
|
429 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
430 |
+
)
|
431 |
+
|
432 |
+
|
433 |
+
@register_model
|
434 |
+
def segmentation_ffnet40S_BCC_mobile():
|
435 |
+
return create_ffnet(
|
436 |
+
ffnet_head_type="C_mobile",
|
437 |
+
task="segmentation_C",
|
438 |
+
num_classes=19,
|
439 |
+
model_name="ffnnet40S_BCC_mobile",
|
440 |
+
backbone=resnet.Resnet40S,
|
441 |
+
pre_downsampling=False,
|
442 |
+
pretrained_weights_path=os.path.join(
|
443 |
+
model_weights_base_path,
|
444 |
+
"ffnet40S/ffnet40S_BCC_cityscapes_state_dict_quarts.pth",
|
445 |
+
),
|
446 |
+
strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
|
447 |
+
)
|
448 |
+
|
449 |
+
|
450 |
+
##########################################################################################
|
451 |
+
##### Classification models with an FFNet structure. Primarily intended for imagenet
|
452 |
+
##### initialization of FFNet.
|
453 |
+
##### See the README for the hyperparameters for training the classification models
|
454 |
+
##########################################################################################
|
455 |
+
@register_model
|
456 |
+
def classification_ffnet150S_BBX_mobile():
|
457 |
+
return create_ffnet(
|
458 |
+
ffnet_head_type="B_mobile",
|
459 |
+
task="classification",
|
460 |
+
num_classes=1000,
|
461 |
+
model_name="ffnnet150S_BBX_mobile",
|
462 |
+
backbone=resnet.Resnet150S,
|
463 |
+
pretrained_weights_path=os.path.join(
|
464 |
+
model_weights_base_path,
|
465 |
+
"ffnet150S/ffnet150S_BBX_imagenet_state_dict_quarts.pth",
|
466 |
+
),
|
467 |
+
strict_loading=True,
|
468 |
+
)
|
469 |
+
|
470 |
+
|
471 |
+
@register_model
|
472 |
+
def classification_ffnet86S_BBX_mobile():
|
473 |
+
return create_ffnet(
|
474 |
+
ffnet_head_type="B_mobile",
|
475 |
+
task="classification",
|
476 |
+
num_classes=1000,
|
477 |
+
model_name="ffnnet86S_BBX_mobile",
|
478 |
+
backbone=resnet.Resnet86S,
|
479 |
+
pretrained_weights_path=os.path.join(
|
480 |
+
model_weights_base_path,
|
481 |
+
"ffnet86S/ffnet86S_BBX_imagenet_state_dict_quarts.pth",
|
482 |
+
),
|
483 |
+
strict_loading=True,
|
484 |
+
)
|
485 |
+
|
486 |
+
|
487 |
+
@register_model
|
488 |
+
def classification_ffnet78S_BBX_mobile():
|
489 |
+
return create_ffnet(
|
490 |
+
ffnet_head_type="B_mobile",
|
491 |
+
task="classification",
|
492 |
+
num_classes=1000,
|
493 |
+
model_name="ffnnet78S_BBX_mobile",
|
494 |
+
backbone=resnet.Resnet78S,
|
495 |
+
pretrained_weights_path=os.path.join(
|
496 |
+
model_weights_base_path,
|
497 |
+
"ffnet78S/ffnet78S_BBX_imagenet_state_dict_quarts.pth",
|
498 |
+
),
|
499 |
+
strict_loading=True,
|
500 |
+
)
|
501 |
+
|
502 |
+
|
503 |
+
@register_model
|
504 |
+
def classification_ffnet54S_BBX_mobile():
|
505 |
+
return create_ffnet(
|
506 |
+
ffnet_head_type="B_mobile",
|
507 |
+
task="classification",
|
508 |
+
num_classes=1000,
|
509 |
+
model_name="ffnnet54S_BBX_mobile",
|
510 |
+
backbone=resnet.Resnet54S,
|
511 |
+
pretrained_weights_path=os.path.join(
|
512 |
+
model_weights_base_path,
|
513 |
+
"ffnet54S/ffnet54S_BBX_imagenet_state_dict_quarts.pth",
|
514 |
+
),
|
515 |
+
strict_loading=True,
|
516 |
+
)
|
517 |
+
|
518 |
+
|
519 |
+
@register_model
|
520 |
+
def classification_ffnet40S_BBX_mobile():
|
521 |
+
return create_ffnet(
|
522 |
+
ffnet_head_type="B_mobile",
|
523 |
+
task="classification",
|
524 |
+
num_classes=1000,
|
525 |
+
model_name="ffnnet40S_BBX_mobile",
|
526 |
+
backbone=resnet.Resnet40S,
|
527 |
+
pretrained_weights_path=os.path.join(
|
528 |
+
model_weights_base_path,
|
529 |
+
"ffnet40S/ffnet40S_BBX_imagenet_state_dict_quarts.pth",
|
530 |
+
),
|
531 |
+
strict_loading=True,
|
532 |
+
)
|
533 |
+
|
534 |
+
|
535 |
+
##########################################################################################
|
536 |
+
##### This is an example of how the FFNet models intended for 1024x512 images
|
537 |
+
##### would be initialized for training on cityscapes with 2048x1024 images
|
538 |
+
##### Set up the rest accordingly
|
539 |
+
##########################################################################################
|
540 |
+
@register_model
|
541 |
+
def segmentation_ffnet78S_BCC_mobile_pre_down_train():
|
542 |
+
return create_ffnet(
|
543 |
+
ffnet_head_type="C_mobile",
|
544 |
+
task="segmentation_C",
|
545 |
+
num_classes=19,
|
546 |
+
model_name="ffnnet78S_BCC_mobile_pre_down",
|
547 |
+
backbone=resnet.Resnet78S,
|
548 |
+
pre_downsampling=True,
|
549 |
+
pretrained_weights_path=os.path.join(
|
550 |
+
model_weights_base_path,
|
551 |
+
"ffnet78S/ffnet78S_BBX_imagenet_state_dict_quarts.pth",
|
552 |
+
),
|
553 |
+
pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
|
554 |
+
strict_loading=False, # Strict loading is false here because the weights are going into a model with pre_downsampling=True
|
555 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py
ADDED
@@ -0,0 +1,663 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
#########################################################################################
|
5 |
+
# Part of the code in UpBranch adapted from https://github.com/feinanshan/FANet/blob/master/Testing/models/fanet/fanet.py
|
6 |
+
#
|
7 |
+
# The original source code was made available under the following license
|
8 |
+
# MIT License
|
9 |
+
#
|
10 |
+
# Copyright (c) 2021 Ping Hu
|
11 |
+
#
|
12 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 |
+
# of this software and associated documentation files (the "Software"), to deal
|
14 |
+
# in the Software without restriction, including without limitation the rights
|
15 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
16 |
+
# copies of the Software, and to permit persons to whom the Software is
|
17 |
+
# furnished to do so, subject to the following conditions:
|
18 |
+
#
|
19 |
+
# The above copyright notice and this permission notice shall be included in all
|
20 |
+
# copies or substantial portions of the Software.
|
21 |
+
#
|
22 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
23 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
24 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
25 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
26 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
28 |
+
# SOFTWARE.
|
29 |
+
|
30 |
+
|
31 |
+
# Code for ClassificationHead adapted from https://github.com/HRNet/HRNet-Image-Classification
|
32 |
+
|
33 |
+
# The original source code was made available under the following license
|
34 |
+
# MIT License
|
35 |
+
# Copyright (c) 2019 Microsoft Corporation
|
36 |
+
#
|
37 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
38 |
+
# of this software and associated documentation files (the "Software"), to deal
|
39 |
+
# in the Software without restriction, including without limitation the rights
|
40 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
41 |
+
# copies of the Software, and to permit persons to whom the Software is
|
42 |
+
# furnished to do so, subject to the following conditions:
|
43 |
+
#
|
44 |
+
# The above copyright notice and this permission notice shall be included in all
|
45 |
+
# copies or substantial portions of the Software.
|
46 |
+
#
|
47 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
48 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
49 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
50 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
51 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
52 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
53 |
+
# SOFTWARE.
|
54 |
+
#########################################################################################
|
55 |
+
|
56 |
+
|
57 |
+
import math
|
58 |
+
import torch
|
59 |
+
from torch import nn
|
60 |
+
from torch.nn import functional as F
|
61 |
+
from models.utils import model_weight_initializer
|
62 |
+
import torchvision.transforms as T
|
63 |
+
from scipy import ndimage
|
64 |
+
|
65 |
+
# The modules here currently assume that there are always 4 branches.
|
66 |
+
# It would need to be adapted in order to support a variable number of branches
|
67 |
+
|
68 |
+
# TODO : Pass BN momentum through config
|
69 |
+
BN_MOMENTUM = 0.1
|
70 |
+
gpu_up_kwargs = {"mode": "bilinear", "align_corners": True}
|
71 |
+
mobile_up_kwargs = {"mode": "nearest"}
|
72 |
+
relu_inplace = True
|
73 |
+
|
74 |
+
# TODO : Replace functional interpolate operations with upsample modules
|
75 |
+
|
76 |
+
|
77 |
+
class ConvBNReLU(nn.Module):
|
78 |
+
def __init__(
|
79 |
+
self,
|
80 |
+
in_chan,
|
81 |
+
out_chan,
|
82 |
+
ks=3,
|
83 |
+
stride=1,
|
84 |
+
padding=1,
|
85 |
+
activation=nn.ReLU,
|
86 |
+
*args,
|
87 |
+
**kwargs,
|
88 |
+
):
|
89 |
+
super(ConvBNReLU, self).__init__()
|
90 |
+
layers = [
|
91 |
+
nn.Conv2d(
|
92 |
+
in_chan,
|
93 |
+
out_chan,
|
94 |
+
kernel_size=ks,
|
95 |
+
stride=stride,
|
96 |
+
padding=padding,
|
97 |
+
bias=False,
|
98 |
+
),
|
99 |
+
nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM),
|
100 |
+
]
|
101 |
+
if activation:
|
102 |
+
layers.append(activation(inplace=relu_inplace))
|
103 |
+
self.layers = nn.Sequential(*layers)
|
104 |
+
|
105 |
+
def forward(self, x):
|
106 |
+
return self.layers(x)
|
107 |
+
|
108 |
+
|
109 |
+
class AdapterConv(nn.Module):
|
110 |
+
def __init__(
|
111 |
+
self, in_channels=[256, 512, 1024, 2048], out_channels=[64, 128, 256, 512]
|
112 |
+
):
|
113 |
+
super(AdapterConv, self).__init__()
|
114 |
+
assert len(in_channels) == len(
|
115 |
+
out_channels
|
116 |
+
), "Number of input and output branches should match"
|
117 |
+
self.adapter_conv = nn.ModuleList()
|
118 |
+
|
119 |
+
for k in range(len(in_channels)):
|
120 |
+
self.adapter_conv.append(
|
121 |
+
ConvBNReLU(in_channels[k], out_channels[k], ks=1, stride=1, padding=0),
|
122 |
+
)
|
123 |
+
|
124 |
+
def forward(self, x):
|
125 |
+
out = []
|
126 |
+
for k in range(len(self.adapter_conv)):
|
127 |
+
out.append(self.adapter_conv[k](x[k]))
|
128 |
+
return out
|
129 |
+
|
130 |
+
|
131 |
+
class UpsampleCat(nn.Module):
|
132 |
+
def __init__(self, upsample_kwargs=gpu_up_kwargs):
|
133 |
+
super(UpsampleCat, self).__init__()
|
134 |
+
self._up_kwargs = upsample_kwargs
|
135 |
+
|
136 |
+
def forward(self, x):
|
137 |
+
"""Upsample and concatenate feature maps."""
|
138 |
+
assert isinstance(x, list) or isinstance(x, tuple)
|
139 |
+
# print(self._up_kwargs)
|
140 |
+
x0 = x[0]
|
141 |
+
_, _, H, W = x0.size()
|
142 |
+
for i in range(1, len(x)):
|
143 |
+
x0 = torch.cat([x0, F.interpolate(x[i], (H, W), **self._up_kwargs)], dim=1)
|
144 |
+
return x0
|
145 |
+
|
146 |
+
|
147 |
+
class UpBranch(nn.Module):
|
148 |
+
def __init__(
|
149 |
+
self,
|
150 |
+
in_channels=[64, 128, 256, 512],
|
151 |
+
out_channels=[128, 128, 128, 128],
|
152 |
+
upsample_kwargs=gpu_up_kwargs,
|
153 |
+
):
|
154 |
+
super(UpBranch, self).__init__()
|
155 |
+
|
156 |
+
self._up_kwargs = upsample_kwargs
|
157 |
+
|
158 |
+
self.fam_32_sm = ConvBNReLU(
|
159 |
+
in_channels[3], out_channels[3], ks=3, stride=1, padding=1
|
160 |
+
)
|
161 |
+
self.fam_32_up = ConvBNReLU(
|
162 |
+
in_channels[3], in_channels[2], ks=1, stride=1, padding=0
|
163 |
+
)
|
164 |
+
self.fam_16_sm = ConvBNReLU(
|
165 |
+
in_channels[2], out_channels[2], ks=3, stride=1, padding=1
|
166 |
+
)
|
167 |
+
self.fam_16_up = ConvBNReLU(
|
168 |
+
in_channels[2], in_channels[1], ks=1, stride=1, padding=0
|
169 |
+
)
|
170 |
+
self.fam_8_sm = ConvBNReLU(
|
171 |
+
in_channels[1], out_channels[1], ks=3, stride=1, padding=1
|
172 |
+
)
|
173 |
+
self.fam_8_up = ConvBNReLU(
|
174 |
+
in_channels[1], in_channels[0], ks=1, stride=1, padding=0
|
175 |
+
)
|
176 |
+
self.fam_4 = ConvBNReLU(
|
177 |
+
in_channels[0], out_channels[0], ks=3, stride=1, padding=1
|
178 |
+
)
|
179 |
+
|
180 |
+
self.high_level_ch = sum(out_channels)
|
181 |
+
self.out_channels = out_channels
|
182 |
+
|
183 |
+
def forward(self, x):
|
184 |
+
|
185 |
+
feat4, feat8, feat16, feat32 = x
|
186 |
+
|
187 |
+
smfeat_32 = self.fam_32_sm(feat32)
|
188 |
+
upfeat_32 = self.fam_32_up(feat32)
|
189 |
+
|
190 |
+
_, _, H, W = feat16.size()
|
191 |
+
x = F.interpolate(upfeat_32, (H, W), **self._up_kwargs) + feat16
|
192 |
+
smfeat_16 = self.fam_16_sm(x)
|
193 |
+
upfeat_16 = self.fam_16_up(x)
|
194 |
+
|
195 |
+
_, _, H, W = feat8.size()
|
196 |
+
x = F.interpolate(upfeat_16, (H, W), **self._up_kwargs) + feat8
|
197 |
+
smfeat_8 = self.fam_8_sm(x)
|
198 |
+
upfeat_8 = self.fam_8_up(x)
|
199 |
+
|
200 |
+
_, _, H, W = feat4.size()
|
201 |
+
smfeat_4 = self.fam_4(
|
202 |
+
F.interpolate(upfeat_8, (H, W), **self._up_kwargs) + feat4
|
203 |
+
)
|
204 |
+
|
205 |
+
return smfeat_4, smfeat_8, smfeat_16, smfeat_32
|
206 |
+
|
207 |
+
|
208 |
+
class FFNetUpHead(nn.Module):
|
209 |
+
def __init__(
|
210 |
+
self,
|
211 |
+
in_chans,
|
212 |
+
use_adapter_conv=True,
|
213 |
+
head_type="B_mobile",
|
214 |
+
task="segmentation_A",
|
215 |
+
num_classes=19,
|
216 |
+
base_chans=[64, 128, 256, 512],
|
217 |
+
dropout_rate=None, # Only used for classification
|
218 |
+
*args,
|
219 |
+
**kwargs,
|
220 |
+
):
|
221 |
+
super(FFNetUpHead, self).__init__()
|
222 |
+
layers = []
|
223 |
+
# base_chans = [64, 128, 128, 128]
|
224 |
+
if head_type.startswith("A"):
|
225 |
+
base_chans = [64, 128, 256, 512]
|
226 |
+
elif head_type.startswith("B"):
|
227 |
+
base_chans = [64, 128, 128, 256]
|
228 |
+
elif head_type.startswith("C"):
|
229 |
+
base_chans = [128, 128, 128, 128]
|
230 |
+
|
231 |
+
if use_adapter_conv:
|
232 |
+
layers.append(AdapterConv(in_chans, base_chans))
|
233 |
+
in_chans = base_chans[:]
|
234 |
+
|
235 |
+
if head_type == "A":
|
236 |
+
layers.append(UpBranch(in_chans))
|
237 |
+
elif head_type == "A_mobile":
|
238 |
+
layers.append(UpBranch(in_chans, upsample_kwargs=mobile_up_kwargs))
|
239 |
+
elif head_type == "B":
|
240 |
+
layers.append(UpBranch(in_chans, [96, 96, 64, 32]))
|
241 |
+
elif head_type == "B_mobile":
|
242 |
+
layers.append(
|
243 |
+
UpBranch(in_chans, [96, 96, 64, 32], upsample_kwargs=mobile_up_kwargs)
|
244 |
+
)
|
245 |
+
elif head_type == "C":
|
246 |
+
layers.append(UpBranch(in_chans, [128, 16, 16, 16]))
|
247 |
+
elif head_type == "C_mobile":
|
248 |
+
layers.append(
|
249 |
+
UpBranch(in_chans, [128, 16, 16, 16], upsample_kwargs=mobile_up_kwargs)
|
250 |
+
)
|
251 |
+
else:
|
252 |
+
raise ValueError(f"Unknown FFNetUpHead type {head_type}")
|
253 |
+
|
254 |
+
self.num_features = layers[-1].high_level_ch
|
255 |
+
self.num_multi_scale_features = layers[-1].out_channels
|
256 |
+
|
257 |
+
if task.startswith("segmentation"):
|
258 |
+
if "mobile" in head_type:
|
259 |
+
layers.append(UpsampleCat(mobile_up_kwargs))
|
260 |
+
else:
|
261 |
+
layers.append(UpsampleCat(gpu_up_kwargs))
|
262 |
+
|
263 |
+
# Gets single scale input
|
264 |
+
if "_C" in task:
|
265 |
+
mid_feat = 128
|
266 |
+
layers.append(
|
267 |
+
SegmentationHead_NoSigmoid_1x1(
|
268 |
+
self.num_features,
|
269 |
+
mid_feat,
|
270 |
+
num_outputs=num_classes,
|
271 |
+
)
|
272 |
+
)
|
273 |
+
elif "_B" in task:
|
274 |
+
mid_feat = 256
|
275 |
+
layers.append(
|
276 |
+
SegmentationHead_NoSigmoid_3x3(
|
277 |
+
self.num_features,
|
278 |
+
mid_feat,
|
279 |
+
num_outputs=num_classes,
|
280 |
+
)
|
281 |
+
)
|
282 |
+
elif "_A" in task:
|
283 |
+
mid_feat = 512
|
284 |
+
layers.append(
|
285 |
+
SegmentationHead_NoSigmoid_1x1(
|
286 |
+
self.num_features,
|
287 |
+
mid_feat,
|
288 |
+
num_outputs=num_classes,
|
289 |
+
)
|
290 |
+
)
|
291 |
+
else:
|
292 |
+
raise ValueError(f"Unknown Segmentation Head {task}")
|
293 |
+
|
294 |
+
elif task == "classification":
|
295 |
+
# Gets multi scale input
|
296 |
+
layers.append(
|
297 |
+
ClassificationHead(
|
298 |
+
self.num_multi_scale_features,
|
299 |
+
[128, 256, 512, 1024],
|
300 |
+
num_outputs=num_classes,
|
301 |
+
dropout_rate=dropout_rate,
|
302 |
+
)
|
303 |
+
)
|
304 |
+
self.layers = nn.Sequential(*layers)
|
305 |
+
|
306 |
+
def forward(self, x):
|
307 |
+
return self.layers(x)
|
308 |
+
|
309 |
+
|
310 |
+
class SimpleBottleneckBlock(nn.Module):
|
311 |
+
expansion = 4
|
312 |
+
|
313 |
+
def __init__(self, inplanes, planes, stride=1):
|
314 |
+
super(SimpleBottleneckBlock, self).__init__()
|
315 |
+
bn_mom = 0.1
|
316 |
+
bn_eps = 1e-5
|
317 |
+
|
318 |
+
self.downsample = None
|
319 |
+
if stride != 1 or inplanes != planes * self.expansion:
|
320 |
+
self.downsample = nn.Sequential(
|
321 |
+
nn.Conv2d(
|
322 |
+
inplanes,
|
323 |
+
planes * self.expansion,
|
324 |
+
kernel_size=1,
|
325 |
+
stride=stride,
|
326 |
+
bias=False,
|
327 |
+
),
|
328 |
+
nn.BatchNorm2d(planes * self.expansion, momentum=bn_mom),
|
329 |
+
)
|
330 |
+
|
331 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
332 |
+
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_mom)
|
333 |
+
self.conv2 = nn.Conv2d(
|
334 |
+
planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
|
335 |
+
)
|
336 |
+
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_mom)
|
337 |
+
self.conv3 = nn.Conv2d(
|
338 |
+
planes, planes * self.expansion, kernel_size=1, bias=False
|
339 |
+
)
|
340 |
+
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=bn_mom)
|
341 |
+
self.relu = nn.ReLU(inplace=True)
|
342 |
+
self.stride = stride
|
343 |
+
|
344 |
+
def forward(self, x):
|
345 |
+
residual = x
|
346 |
+
|
347 |
+
out = self.conv1(x)
|
348 |
+
out = self.bn1(out)
|
349 |
+
out = self.relu(out)
|
350 |
+
|
351 |
+
out = self.conv2(out)
|
352 |
+
out = self.bn2(out)
|
353 |
+
out = self.relu(out)
|
354 |
+
|
355 |
+
out = self.conv3(out)
|
356 |
+
out = self.bn3(out)
|
357 |
+
|
358 |
+
if self.downsample is not None:
|
359 |
+
residual = self.downsample(x)
|
360 |
+
|
361 |
+
out += residual
|
362 |
+
out = self.relu(out)
|
363 |
+
|
364 |
+
return out
|
365 |
+
|
366 |
+
|
367 |
+
class ClassificationHead(nn.Module):
|
368 |
+
def __init__(
|
369 |
+
self,
|
370 |
+
pre_head_channels,
|
371 |
+
head_channels=[128, 256, 512, 1024],
|
372 |
+
num_outputs=1,
|
373 |
+
dropout_rate=None,
|
374 |
+
):
|
375 |
+
super(ClassificationHead, self).__init__()
|
376 |
+
|
377 |
+
self.dropout_rate = dropout_rate
|
378 |
+
bn_mom = 0.1
|
379 |
+
bn_eps = 1e-5
|
380 |
+
head_block_type = SimpleBottleneckBlock
|
381 |
+
head_expansion = 4
|
382 |
+
|
383 |
+
expansion_layers = []
|
384 |
+
for i, pre_head_channel in enumerate(pre_head_channels):
|
385 |
+
expansion_layer = head_block_type(
|
386 |
+
pre_head_channel,
|
387 |
+
int(head_channels[i] / head_expansion),
|
388 |
+
)
|
389 |
+
expansion_layers.append(expansion_layer)
|
390 |
+
self.expansion_layers = nn.ModuleList(expansion_layers)
|
391 |
+
|
392 |
+
# downsampling modules
|
393 |
+
downsampling_layers = []
|
394 |
+
for i in range(len(pre_head_channels) - 1):
|
395 |
+
input_channels = head_channels[i]
|
396 |
+
output_channels = head_channels[i + 1]
|
397 |
+
|
398 |
+
downsampling_layer = nn.Sequential(
|
399 |
+
nn.Conv2d(
|
400 |
+
in_channels=input_channels,
|
401 |
+
out_channels=output_channels,
|
402 |
+
kernel_size=3,
|
403 |
+
stride=2,
|
404 |
+
padding=1,
|
405 |
+
),
|
406 |
+
nn.BatchNorm2d(output_channels, momentum=bn_mom),
|
407 |
+
nn.ReLU(),
|
408 |
+
)
|
409 |
+
|
410 |
+
downsampling_layers.append(downsampling_layer)
|
411 |
+
self.downsampling_layers = nn.ModuleList(downsampling_layers)
|
412 |
+
|
413 |
+
self.final_layer = nn.Sequential(
|
414 |
+
nn.Conv2d(
|
415 |
+
in_channels=head_channels[-1],
|
416 |
+
out_channels=2048,
|
417 |
+
kernel_size=1,
|
418 |
+
stride=1,
|
419 |
+
padding=0,
|
420 |
+
),
|
421 |
+
nn.BatchNorm2d(2048, momentum=bn_mom),
|
422 |
+
nn.ReLU(inplace=True),
|
423 |
+
)
|
424 |
+
|
425 |
+
self.adaptive_avg_pool = nn.AdaptiveAvgPool2d(1)
|
426 |
+
self.classifier = nn.Linear(
|
427 |
+
2048,
|
428 |
+
num_outputs,
|
429 |
+
)
|
430 |
+
|
431 |
+
def forward(self, x):
|
432 |
+
|
433 |
+
next_x = self.expansion_layers[0](x[0])
|
434 |
+
for i in range(len(self.downsampling_layers)):
|
435 |
+
next_x = self.expansion_layers[i + 1](x[i + 1]) + self.downsampling_layers[
|
436 |
+
i
|
437 |
+
](next_x)
|
438 |
+
x = next_x
|
439 |
+
|
440 |
+
x = self.final_layer(x)
|
441 |
+
x = self.adaptive_avg_pool(x).squeeze()
|
442 |
+
|
443 |
+
if self.dropout_rate:
|
444 |
+
x = torch.nn.functional.dropout(
|
445 |
+
x, p=self._model_config.dropout_rate, training=self.training
|
446 |
+
)
|
447 |
+
|
448 |
+
x = self.classifier(x)
|
449 |
+
return x
|
450 |
+
|
451 |
+
|
452 |
+
class SegmentationHead_NoSigmoid_3x3(nn.Module):
|
453 |
+
def __init__(
|
454 |
+
self, backbone_channels, mid_channels=256, kernel_size=3, num_outputs=1
|
455 |
+
):
|
456 |
+
super(SegmentationHead_NoSigmoid_3x3, self).__init__()
|
457 |
+
last_inp_channels = backbone_channels
|
458 |
+
self.last_layer = nn.Sequential(
|
459 |
+
nn.Conv2d(
|
460 |
+
in_channels=last_inp_channels,
|
461 |
+
out_channels=mid_channels,
|
462 |
+
kernel_size=kernel_size,
|
463 |
+
stride=1,
|
464 |
+
padding=kernel_size // 2,
|
465 |
+
),
|
466 |
+
nn.BatchNorm2d(mid_channels, momentum=BN_MOMENTUM),
|
467 |
+
nn.ReLU(inplace=relu_inplace),
|
468 |
+
nn.Conv2d(
|
469 |
+
in_channels=mid_channels,
|
470 |
+
out_channels=num_outputs,
|
471 |
+
kernel_size=kernel_size,
|
472 |
+
stride=1,
|
473 |
+
padding=kernel_size // 2,
|
474 |
+
),
|
475 |
+
)
|
476 |
+
|
477 |
+
def forward(self, x):
|
478 |
+
x = self.last_layer(x)
|
479 |
+
return x
|
480 |
+
|
481 |
+
|
482 |
+
class SegmentationHead_NoSigmoid_1x1(nn.Module):
|
483 |
+
def __init__(
|
484 |
+
self, backbone_channels, mid_channels=512, kernel_size=3, num_outputs=1
|
485 |
+
):
|
486 |
+
super(SegmentationHead_NoSigmoid_1x1, self).__init__()
|
487 |
+
last_inp_channels = backbone_channels
|
488 |
+
self.last_layer = nn.Sequential(
|
489 |
+
nn.Conv2d(
|
490 |
+
in_channels=last_inp_channels,
|
491 |
+
out_channels=mid_channels,
|
492 |
+
kernel_size=kernel_size,
|
493 |
+
stride=1,
|
494 |
+
padding=kernel_size // 2,
|
495 |
+
),
|
496 |
+
nn.BatchNorm2d(mid_channels, momentum=BN_MOMENTUM),
|
497 |
+
nn.ReLU(inplace=relu_inplace),
|
498 |
+
nn.Conv2d(
|
499 |
+
in_channels=mid_channels,
|
500 |
+
out_channels=num_outputs,
|
501 |
+
kernel_size=1,
|
502 |
+
stride=1,
|
503 |
+
padding=0,
|
504 |
+
),
|
505 |
+
)
|
506 |
+
|
507 |
+
def forward(self, x):
|
508 |
+
x = self.last_layer(x)
|
509 |
+
return x
|
510 |
+
|
511 |
+
|
512 |
+
class GaussianConv2D(nn.Module):
|
513 |
+
"""
|
514 |
+
Gaussian smoothing + downsampling, applied independently per channel
|
515 |
+
THIS IS NOT MEANT FOR USE ON MOBILE. MIGHT BE HORRIBLY SLOW
|
516 |
+
"""
|
517 |
+
|
518 |
+
def __init__(self, channels, kernel_size, sigma, stride=1):
|
519 |
+
super(GaussianConv2D, self).__init__()
|
520 |
+
assert isinstance(
|
521 |
+
kernel_size, int
|
522 |
+
), "Specify kernel size as int. Both dimensions will get the same kernel size"
|
523 |
+
assert isinstance(sigma, float), "Specify sigma as float. Anisotropic gaussian"
|
524 |
+
|
525 |
+
kernel = torch.zeros(kernel_size, kernel_size)
|
526 |
+
mean_loc = int((kernel_size - 1) / 2) # Because 0 indexed
|
527 |
+
kernel[mean_loc, mean_loc] = 1
|
528 |
+
kernel = torch.from_numpy(ndimage.gaussian_filter(kernel.numpy(), sigma=sigma))
|
529 |
+
|
530 |
+
# Make a dwise conv out of the kernel
|
531 |
+
# Weights of shape out_channels, in_channels/groups, k, k
|
532 |
+
kernel = kernel.view(1, 1, kernel_size, kernel_size)
|
533 |
+
kernel = kernel.repeat(channels, 1, 1, 1)
|
534 |
+
|
535 |
+
self.conv = F.conv2d
|
536 |
+
# Register the kernel buffer instead of as a parameter, so that the training doesn't
|
537 |
+
# happily update it
|
538 |
+
self.register_buffer("weight", kernel)
|
539 |
+
self.channels = channels
|
540 |
+
self.stride = stride
|
541 |
+
|
542 |
+
def forward(self, input):
|
543 |
+
return self.conv(
|
544 |
+
input, weight=self.weight, groups=self.channels, stride=self.stride
|
545 |
+
)
|
546 |
+
|
547 |
+
|
548 |
+
class FFNet(nn.Module):
|
549 |
+
def __init__(
|
550 |
+
self,
|
551 |
+
ffnet_head_type="A",
|
552 |
+
num_classes=19,
|
553 |
+
task="segmentation_A",
|
554 |
+
use_adapter_convs=True,
|
555 |
+
backbone=None,
|
556 |
+
pre_downsampling=False,
|
557 |
+
model_name="default",
|
558 |
+
dropout_rate=None,
|
559 |
+
**kwargs,
|
560 |
+
):
|
561 |
+
super(FFNet, self).__init__()
|
562 |
+
self.backbone_model = backbone()
|
563 |
+
branch_chans = self.backbone_model.out_channels
|
564 |
+
self.use_adapter_convs = use_adapter_convs
|
565 |
+
self.ffnet_head_type = ffnet_head_type
|
566 |
+
self.task = task
|
567 |
+
self.head = FFNetUpHead(
|
568 |
+
branch_chans,
|
569 |
+
use_adapter_conv=use_adapter_convs,
|
570 |
+
head_type=ffnet_head_type,
|
571 |
+
num_classes=num_classes,
|
572 |
+
task=task,
|
573 |
+
dropout_rate=dropout_rate,
|
574 |
+
)
|
575 |
+
self.model_name = model_name
|
576 |
+
# Pre-downsampling is used while training models that use 1024x512 image sizes rather than 2048x1024.
|
577 |
+
self.pre_downsampling = pre_downsampling
|
578 |
+
if self.pre_downsampling:
|
579 |
+
self.smoothing = GaussianConv2D(
|
580 |
+
channels=3, kernel_size=5, sigma=0.7, stride=2
|
581 |
+
)
|
582 |
+
|
583 |
+
def forward(self, x):
|
584 |
+
if self.pre_downsampling:
|
585 |
+
x = self.smooth_and_downsample_input(x)
|
586 |
+
x = self.backbone_model(x)
|
587 |
+
return self.head(x)
|
588 |
+
|
589 |
+
def smooth_and_downsample_input(self, x):
|
590 |
+
x = F.pad(x, (0, 0, 1, 1), mode="reflect")
|
591 |
+
return self.smoothing(x)
|
592 |
+
|
593 |
+
def init_model(
|
594 |
+
self, pretrained_path=None, strict_loading=True, backbone_only=False
|
595 |
+
):
|
596 |
+
print(f"Initializing {self.model_name} weights")
|
597 |
+
self.apply(model_weight_initializer)
|
598 |
+
if pretrained_path:
|
599 |
+
pretrained_dict = torch.load(
|
600 |
+
pretrained_path, map_location={"cuda:0": "cpu"}
|
601 |
+
)
|
602 |
+
if backbone_only:
|
603 |
+
backbone_dict = {}
|
604 |
+
for k, v in pretrained_dict.items():
|
605 |
+
if k.startswith("backbone_model"):
|
606 |
+
backbone_dict[k] = v
|
607 |
+
self.load_state_dict(backbone_dict, strict=strict_loading)
|
608 |
+
else:
|
609 |
+
self.load_state_dict(pretrained_dict, strict=strict_loading)
|
610 |
+
else:
|
611 |
+
self.backbone_model.load_weights()
|
612 |
+
|
613 |
+
|
614 |
+
def create_ffnet(
|
615 |
+
pretrained=True,
|
616 |
+
imagenet_backbone_pretrained=True,
|
617 |
+
pretrained_weights_path=None,
|
618 |
+
pretrained_backbone_only=False,
|
619 |
+
ffnet_head_type="A",
|
620 |
+
strict_loading=True,
|
621 |
+
num_classes=19,
|
622 |
+
task="segmentation_A",
|
623 |
+
model_name="ffnnet122NS_CCC",
|
624 |
+
backbone=None,
|
625 |
+
pre_downsampling=False,
|
626 |
+
dropout_rate=None,
|
627 |
+
**kwargs,
|
628 |
+
):
|
629 |
+
|
630 |
+
if pretrained_weights_path:
|
631 |
+
model_wghts = pretrained_weights_path
|
632 |
+
pretrained = True
|
633 |
+
if imagenet_backbone_pretrained:
|
634 |
+
pretrained = True
|
635 |
+
|
636 |
+
model = FFNet(
|
637 |
+
ffnet_head_type=ffnet_head_type,
|
638 |
+
num_classes=num_classes,
|
639 |
+
task=task,
|
640 |
+
use_adapter_convs=True,
|
641 |
+
backbone=backbone,
|
642 |
+
pre_downsampling=pre_downsampling,
|
643 |
+
model_name=model_name,
|
644 |
+
dropout_rate=dropout_rate,
|
645 |
+
)
|
646 |
+
|
647 |
+
model.apply(model_weight_initializer)
|
648 |
+
if pretrained:
|
649 |
+
if pretrained_weights_path:
|
650 |
+
print("Loading pretrained model state dict from {}".format(model_wghts))
|
651 |
+
model.init_model(
|
652 |
+
model_wghts,
|
653 |
+
strict_loading=strict_loading,
|
654 |
+
backbone_only=pretrained_backbone_only,
|
655 |
+
)
|
656 |
+
else:
|
657 |
+
print(
|
658 |
+
"No model weights provided, attempting to load imagenet pretrained backbone..."
|
659 |
+
)
|
660 |
+
model.init_model()
|
661 |
+
|
662 |
+
model.eval()
|
663 |
+
return model
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import os
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
from models import resnet
|
11 |
+
|
12 |
+
import os
|
13 |
+
import sys
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
+
import torch.nn as nn
|
17 |
+
import torch._utils
|
18 |
+
import torch.nn.functional as F
|
19 |
+
|
20 |
+
from models.ffnet_blocks import create_ffnet
|
21 |
+
from models.model_registry import register_model
|
22 |
+
from config import model_weights_base_path
|
23 |
+
|
24 |
+
|
25 |
+
##########################################################################################
|
26 |
+
##### 4-Stage GPU FFNets with ResNet backbone.
|
27 |
+
##### These are trained for use with image sizes of 2048x1024
|
28 |
+
##### and output a segmentation map of 512x256 pixels
|
29 |
+
##########################################################################################
|
30 |
+
@register_model
|
31 |
+
def segmentation_ffnet150_AAA():
|
32 |
+
return create_ffnet(
|
33 |
+
ffnet_head_type="A",
|
34 |
+
task="segmentation_A",
|
35 |
+
num_classes=19,
|
36 |
+
model_name="ffnnet150_AAA",
|
37 |
+
backbone=resnet.Resnet150,
|
38 |
+
pre_downsampling=False,
|
39 |
+
pretrained_weights_path=os.path.join(
|
40 |
+
model_weights_base_path,
|
41 |
+
"ffnet150/ffnet150_AAA_cityscapes_state_dict_quarts.pth",
|
42 |
+
),
|
43 |
+
strict_loading=True,
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
@register_model
|
48 |
+
def segmentation_ffnet134_AAA():
|
49 |
+
return create_ffnet(
|
50 |
+
ffnet_head_type="A",
|
51 |
+
task="segmentation_A",
|
52 |
+
num_classes=19,
|
53 |
+
model_name="ffnnet134_AAA",
|
54 |
+
backbone=resnet.Resnet134,
|
55 |
+
pre_downsampling=False,
|
56 |
+
pretrained_weights_path=os.path.join(
|
57 |
+
model_weights_base_path,
|
58 |
+
"ffnet134/ffnet134_AAA_cityscapes_state_dict_quarts.pth",
|
59 |
+
),
|
60 |
+
strict_loading=True,
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
@register_model
|
65 |
+
def segmentation_ffnet101_AAA():
|
66 |
+
return create_ffnet(
|
67 |
+
ffnet_head_type="A",
|
68 |
+
task="segmentation_A",
|
69 |
+
num_classes=19,
|
70 |
+
model_name="ffnnet101_AAA",
|
71 |
+
backbone=resnet.Resnet101,
|
72 |
+
pre_downsampling=False,
|
73 |
+
pretrained_weights_path=os.path.join(
|
74 |
+
model_weights_base_path,
|
75 |
+
"ffnet101/ffnet101_AAA_cityscapes_state_dict_quarts.pth",
|
76 |
+
),
|
77 |
+
strict_loading=True,
|
78 |
+
)
|
79 |
+
|
80 |
+
|
81 |
+
@register_model
|
82 |
+
def segmentation_ffnet86_AAA():
|
83 |
+
return create_ffnet(
|
84 |
+
ffnet_head_type="A",
|
85 |
+
task="segmentation_A",
|
86 |
+
num_classes=19,
|
87 |
+
model_name="ffnnet86_AAA",
|
88 |
+
backbone=resnet.Resnet86,
|
89 |
+
pre_downsampling=False,
|
90 |
+
pretrained_weights_path=os.path.join(
|
91 |
+
model_weights_base_path,
|
92 |
+
"ffnet86/ffnet86_AAA_cityscapes_state_dict_quarts.pth",
|
93 |
+
),
|
94 |
+
strict_loading=True,
|
95 |
+
)
|
96 |
+
|
97 |
+
|
98 |
+
@register_model
|
99 |
+
def segmentation_ffnet56_AAA():
|
100 |
+
return create_ffnet(
|
101 |
+
ffnet_head_type="A",
|
102 |
+
task="segmentation_A",
|
103 |
+
num_classes=19,
|
104 |
+
model_name="ffnnet56_AAA",
|
105 |
+
backbone=resnet.Resnet56,
|
106 |
+
pre_downsampling=False,
|
107 |
+
pretrained_weights_path=os.path.join(
|
108 |
+
model_weights_base_path,
|
109 |
+
"ffnet56/ffnet56_AAA_cityscapes_state_dict_quarts.pth",
|
110 |
+
),
|
111 |
+
strict_loading=True,
|
112 |
+
)
|
113 |
+
|
114 |
+
|
115 |
+
@register_model
|
116 |
+
def segmentation_ffnet50_AAA():
|
117 |
+
return create_ffnet(
|
118 |
+
ffnet_head_type="A",
|
119 |
+
task="segmentation_A",
|
120 |
+
num_classes=19,
|
121 |
+
model_name="ffnnet50_AAA",
|
122 |
+
backbone=resnet.Resnet50,
|
123 |
+
pre_downsampling=False,
|
124 |
+
pretrained_weights_path=os.path.join(
|
125 |
+
model_weights_base_path,
|
126 |
+
"ffnet50/ffnet50_AAA_cityscapes_state_dict_quarts.pth",
|
127 |
+
),
|
128 |
+
strict_loading=True,
|
129 |
+
)
|
130 |
+
|
131 |
+
|
132 |
+
@register_model
|
133 |
+
def segmentation_ffnet34_AAA():
|
134 |
+
return create_ffnet(
|
135 |
+
ffnet_head_type="A",
|
136 |
+
task="segmentation_A",
|
137 |
+
num_classes=19,
|
138 |
+
model_name="ffnnet34_AAA",
|
139 |
+
backbone=resnet.Resnet34,
|
140 |
+
pre_downsampling=False,
|
141 |
+
pretrained_weights_path=os.path.join(
|
142 |
+
model_weights_base_path,
|
143 |
+
"ffnet34/ffnet34_AAA_cityscapes_state_dict_quarts.pth",
|
144 |
+
),
|
145 |
+
strict_loading=True,
|
146 |
+
)
|
147 |
+
|
148 |
+
|
149 |
+
@register_model
|
150 |
+
def segmentation_ffnet150_ABB():
|
151 |
+
return create_ffnet(
|
152 |
+
ffnet_head_type="B",
|
153 |
+
task="segmentation_B",
|
154 |
+
num_classes=19,
|
155 |
+
model_name="ffnnet150_ABB",
|
156 |
+
backbone=resnet.Resnet150,
|
157 |
+
pre_downsampling=False,
|
158 |
+
pretrained_weights_path=os.path.join(
|
159 |
+
model_weights_base_path,
|
160 |
+
"ffnet150/ffnet150_ABB_cityscapes_state_dict_quarts.pth",
|
161 |
+
),
|
162 |
+
strict_loading=True,
|
163 |
+
)
|
164 |
+
|
165 |
+
|
166 |
+
@register_model
|
167 |
+
def segmentation_ffnet86_ABB():
|
168 |
+
return create_ffnet(
|
169 |
+
ffnet_head_type="B",
|
170 |
+
task="segmentation_B",
|
171 |
+
num_classes=19,
|
172 |
+
model_name="ffnnet86_ABB",
|
173 |
+
backbone=resnet.Resnet86,
|
174 |
+
pre_downsampling=False,
|
175 |
+
pretrained_weights_path=os.path.join(
|
176 |
+
model_weights_base_path,
|
177 |
+
"ffnet86/ffnet86_ABB_cityscapes_state_dict_quarts.pth",
|
178 |
+
),
|
179 |
+
strict_loading=True,
|
180 |
+
)
|
181 |
+
|
182 |
+
|
183 |
+
@register_model
|
184 |
+
def segmentation_ffnet56_ABB():
|
185 |
+
return create_ffnet(
|
186 |
+
ffnet_head_type="B",
|
187 |
+
task="segmentation_B",
|
188 |
+
num_classes=19,
|
189 |
+
model_name="ffnnet56_ABB",
|
190 |
+
backbone=resnet.Resnet56,
|
191 |
+
pre_downsampling=False,
|
192 |
+
pretrained_weights_path=os.path.join(
|
193 |
+
model_weights_base_path,
|
194 |
+
"ffnet56/ffnet56_ABB_cityscapes_state_dict_quarts.pth",
|
195 |
+
),
|
196 |
+
strict_loading=True,
|
197 |
+
)
|
198 |
+
|
199 |
+
|
200 |
+
@register_model
|
201 |
+
def segmentation_ffnet34_ABB():
|
202 |
+
return create_ffnet(
|
203 |
+
ffnet_head_type="B",
|
204 |
+
task="segmentation_B",
|
205 |
+
num_classes=19,
|
206 |
+
model_name="ffnnet34_ABB",
|
207 |
+
backbone=resnet.Resnet34,
|
208 |
+
pre_downsampling=False,
|
209 |
+
pretrained_weights_path=os.path.join(
|
210 |
+
model_weights_base_path,
|
211 |
+
"ffnet34/ffnet34_ABB_cityscapes_state_dict_quarts.pth",
|
212 |
+
),
|
213 |
+
strict_loading=True,
|
214 |
+
)
|
215 |
+
|
216 |
+
|
217 |
+
##########################################################################################
|
218 |
+
##### This is an example of how these FFNet models would be initialized for training on
|
219 |
+
##### cityscapes with 2048x1024 images
|
220 |
+
##########################################################################################
|
221 |
+
@register_model
|
222 |
+
def segmentation_ffnet150_AAA_train():
|
223 |
+
return create_ffnet(
|
224 |
+
ffnet_head_type="A",
|
225 |
+
task="segmentation_A",
|
226 |
+
num_classes=19,
|
227 |
+
model_name="ffnnet150_AAA",
|
228 |
+
backbone=resnet.Resnet150,
|
229 |
+
pretrained_weights_path=os.path.join(
|
230 |
+
model_weights_base_path,
|
231 |
+
"ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
|
232 |
+
),
|
233 |
+
pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
|
234 |
+
strict_loading=False,
|
235 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py
ADDED
@@ -0,0 +1,385 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import os
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
from models import resnet
|
11 |
+
|
12 |
+
import os
|
13 |
+
import sys
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
+
import torch.nn as nn
|
17 |
+
import torch._utils
|
18 |
+
import torch.nn.functional as F
|
19 |
+
|
20 |
+
from models.ffnet_blocks import create_ffnet
|
21 |
+
from models.model_registry import register_model
|
22 |
+
from config import model_weights_base_path
|
23 |
+
|
24 |
+
|
25 |
+
##########################################################################################
|
26 |
+
##### 4-Stage GPU FFNets with ResNet backbone.
|
27 |
+
##### These are trained for use with image sizes of 2048x1024
|
28 |
+
##### and output a segmentation map of 256x128 pixels
|
29 |
+
##########################################################################################
|
30 |
+
@register_model
|
31 |
+
def segmentation_ffnet150_dAAA():
|
32 |
+
return create_ffnet(
|
33 |
+
ffnet_head_type="A",
|
34 |
+
task="segmentation_A",
|
35 |
+
num_classes=19,
|
36 |
+
model_name="ffnnet150_dAAA",
|
37 |
+
backbone=resnet.Resnet150_D,
|
38 |
+
pre_downsampling=False,
|
39 |
+
pretrained_weights_path=os.path.join(
|
40 |
+
model_weights_base_path,
|
41 |
+
"ffnet150/ffnet150_dAAA_cityscapes_state_dict_quarts.pth",
|
42 |
+
),
|
43 |
+
strict_loading=True,
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
@register_model
|
48 |
+
def segmentation_ffnet134_dAAA():
|
49 |
+
return create_ffnet(
|
50 |
+
ffnet_head_type="A",
|
51 |
+
task="segmentation_A",
|
52 |
+
num_classes=19,
|
53 |
+
model_name="ffnnet134_dAAA",
|
54 |
+
backbone=resnet.Resnet134_D,
|
55 |
+
pre_downsampling=False,
|
56 |
+
pretrained_weights_path=os.path.join(
|
57 |
+
model_weights_base_path,
|
58 |
+
"ffnet134/ffnet134_dAAA_cityscapes_state_dict_quarts.pth",
|
59 |
+
),
|
60 |
+
strict_loading=True,
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
@register_model
|
65 |
+
def segmentation_ffnet101_dAAA():
|
66 |
+
return create_ffnet(
|
67 |
+
ffnet_head_type="A",
|
68 |
+
task="segmentation_A",
|
69 |
+
num_classes=19,
|
70 |
+
model_name="ffnnet101_dAAA",
|
71 |
+
backbone=resnet.Resnet101_D,
|
72 |
+
pre_downsampling=False,
|
73 |
+
pretrained_weights_path=os.path.join(
|
74 |
+
model_weights_base_path,
|
75 |
+
"ffnet101/ffnet101_dAAA_cityscapes_state_dict_quarts.pth",
|
76 |
+
),
|
77 |
+
strict_loading=True,
|
78 |
+
)
|
79 |
+
|
80 |
+
|
81 |
+
@register_model
|
82 |
+
def segmentation_ffnet86_dAAA():
|
83 |
+
return create_ffnet(
|
84 |
+
ffnet_head_type="A",
|
85 |
+
task="segmentation_A",
|
86 |
+
num_classes=19,
|
87 |
+
model_name="ffnnet86_dAAA",
|
88 |
+
backbone=resnet.Resnet86_D,
|
89 |
+
pre_downsampling=False,
|
90 |
+
pretrained_weights_path=os.path.join(
|
91 |
+
model_weights_base_path,
|
92 |
+
"ffnet86/ffnet86_dAAA_cityscapes_state_dict_quarts.pth",
|
93 |
+
),
|
94 |
+
strict_loading=True,
|
95 |
+
)
|
96 |
+
|
97 |
+
|
98 |
+
@register_model
|
99 |
+
def segmentation_ffnet56_dAAA():
|
100 |
+
return create_ffnet(
|
101 |
+
ffnet_head_type="A",
|
102 |
+
task="segmentation_A",
|
103 |
+
num_classes=19,
|
104 |
+
model_name="ffnnet56_dAAA",
|
105 |
+
backbone=resnet.Resnet56_D,
|
106 |
+
pre_downsampling=False,
|
107 |
+
pretrained_weights_path=os.path.join(
|
108 |
+
model_weights_base_path,
|
109 |
+
"ffnet56/ffnet56_dAAA_cityscapes_state_dict_quarts.pth",
|
110 |
+
),
|
111 |
+
strict_loading=True,
|
112 |
+
)
|
113 |
+
|
114 |
+
|
115 |
+
@register_model
|
116 |
+
def segmentation_ffnet50_dAAA():
|
117 |
+
return create_ffnet(
|
118 |
+
ffnet_head_type="A",
|
119 |
+
task="segmentation_A",
|
120 |
+
num_classes=19,
|
121 |
+
model_name="ffnnet50_dAAA",
|
122 |
+
backbone=resnet.Resnet50_D,
|
123 |
+
pre_downsampling=False,
|
124 |
+
pretrained_weights_path=os.path.join(
|
125 |
+
model_weights_base_path,
|
126 |
+
"ffnet50/ffnet50_dAAA_cityscapes_state_dict_quarts.pth",
|
127 |
+
),
|
128 |
+
strict_loading=True,
|
129 |
+
)
|
130 |
+
|
131 |
+
|
132 |
+
@register_model
|
133 |
+
def segmentation_ffnet34_dAAA():
|
134 |
+
return create_ffnet(
|
135 |
+
ffnet_head_type="A",
|
136 |
+
task="segmentation_A",
|
137 |
+
num_classes=19,
|
138 |
+
model_name="ffnnet34_dAAA",
|
139 |
+
backbone=resnet.Resnet34_D,
|
140 |
+
pre_downsampling=False,
|
141 |
+
pretrained_weights_path=os.path.join(
|
142 |
+
model_weights_base_path,
|
143 |
+
"ffnet34/ffnet34_dAAA_cityscapes_state_dict_quarts.pth",
|
144 |
+
),
|
145 |
+
strict_loading=True,
|
146 |
+
)
|
147 |
+
|
148 |
+
|
149 |
+
@register_model
|
150 |
+
def segmentation_ffnet18_dAAA():
|
151 |
+
return create_ffnet(
|
152 |
+
ffnet_head_type="A",
|
153 |
+
task="segmentation_A",
|
154 |
+
num_classes=19,
|
155 |
+
model_name="ffnnet18_dAAA",
|
156 |
+
backbone=resnet.Resnet18_D,
|
157 |
+
pre_downsampling=False,
|
158 |
+
pretrained_weights_path=os.path.join(
|
159 |
+
model_weights_base_path,
|
160 |
+
"ffnet18/ffnet18_dAAA_cityscapes_state_dict_quarts.pth",
|
161 |
+
),
|
162 |
+
strict_loading=True,
|
163 |
+
)
|
164 |
+
|
165 |
+
|
166 |
+
@register_model
|
167 |
+
def segmentation_ffnet150_dAAC():
|
168 |
+
return create_ffnet(
|
169 |
+
ffnet_head_type="A",
|
170 |
+
task="segmentation_C",
|
171 |
+
num_classes=19,
|
172 |
+
model_name="ffnnet150_dAAC",
|
173 |
+
backbone=resnet.Resnet150_D,
|
174 |
+
pre_downsampling=False,
|
175 |
+
pretrained_weights_path=os.path.join(
|
176 |
+
model_weights_base_path,
|
177 |
+
"ffnet150/ffnet150_dAAC_cityscapes_state_dict_quarts.pth",
|
178 |
+
),
|
179 |
+
strict_loading=True,
|
180 |
+
)
|
181 |
+
|
182 |
+
|
183 |
+
@register_model
|
184 |
+
def segmentation_ffnet86_dAAC():
|
185 |
+
return create_ffnet(
|
186 |
+
ffnet_head_type="A",
|
187 |
+
task="segmentation_C",
|
188 |
+
num_classes=19,
|
189 |
+
model_name="ffnnet86_dAAC",
|
190 |
+
backbone=resnet.Resnet86_D,
|
191 |
+
pre_downsampling=False,
|
192 |
+
pretrained_weights_path=os.path.join(
|
193 |
+
model_weights_base_path,
|
194 |
+
"ffnet86/ffnet86_dAAC_cityscapes_state_dict_quarts.pth",
|
195 |
+
),
|
196 |
+
strict_loading=True,
|
197 |
+
)
|
198 |
+
|
199 |
+
|
200 |
+
@register_model
|
201 |
+
def segmentation_ffnet34_dAAC():
|
202 |
+
return create_ffnet(
|
203 |
+
ffnet_head_type="A",
|
204 |
+
task="segmentation_C",
|
205 |
+
num_classes=19,
|
206 |
+
model_name="ffnnet34_dAAC",
|
207 |
+
backbone=resnet.Resnet34_D,
|
208 |
+
pre_downsampling=False,
|
209 |
+
pretrained_weights_path=os.path.join(
|
210 |
+
model_weights_base_path,
|
211 |
+
"ffnet34/ffnet34_dAAC_cityscapes_state_dict_quarts.pth",
|
212 |
+
),
|
213 |
+
strict_loading=True,
|
214 |
+
)
|
215 |
+
|
216 |
+
|
217 |
+
@register_model
|
218 |
+
def segmentation_ffnet18_dAAC():
|
219 |
+
return create_ffnet(
|
220 |
+
ffnet_head_type="A",
|
221 |
+
task="segmentation_C",
|
222 |
+
num_classes=19,
|
223 |
+
model_name="ffnnet18_dAAC",
|
224 |
+
backbone=resnet.Resnet18_D,
|
225 |
+
pre_downsampling=False,
|
226 |
+
pretrained_weights_path=os.path.join(
|
227 |
+
model_weights_base_path,
|
228 |
+
"ffnet18/ffnet18_dAAC_cityscapes_state_dict_quarts.pth",
|
229 |
+
),
|
230 |
+
strict_loading=True,
|
231 |
+
)
|
232 |
+
|
233 |
+
|
234 |
+
##########################################################################################
|
235 |
+
##### Classification models with an FFNet structure. Primarily intended for imagenet
|
236 |
+
##### initialization of FFNet.
|
237 |
+
##### See the README for the hyperparameters for training the classification models
|
238 |
+
##########################################################################################
|
239 |
+
@register_model
|
240 |
+
def classification_ffnet150_AAX():
|
241 |
+
return create_ffnet(
|
242 |
+
ffnet_head_type="A",
|
243 |
+
task="classification",
|
244 |
+
num_classes=1000,
|
245 |
+
model_name="ffnnet150_AAX",
|
246 |
+
backbone=resnet.Resnet150,
|
247 |
+
pretrained_weights_path=os.path.join(
|
248 |
+
model_weights_base_path,
|
249 |
+
"ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
|
250 |
+
),
|
251 |
+
strict_loading=True,
|
252 |
+
)
|
253 |
+
|
254 |
+
|
255 |
+
@register_model
|
256 |
+
def classification_ffnet134_AAX():
|
257 |
+
return create_ffnet(
|
258 |
+
ffnet_head_type="A",
|
259 |
+
task="classification",
|
260 |
+
num_classes=1000,
|
261 |
+
model_name="ffnnet134_AAX",
|
262 |
+
backbone=resnet.Resnet134,
|
263 |
+
pretrained_weights_path=os.path.join(
|
264 |
+
model_weights_base_path,
|
265 |
+
"ffnet134/ffnet134_AAX_imagenet_state_dict_quarts.pth",
|
266 |
+
),
|
267 |
+
strict_loading=True,
|
268 |
+
)
|
269 |
+
|
270 |
+
|
271 |
+
@register_model
|
272 |
+
def classification_ffnet101_AAX():
|
273 |
+
return create_ffnet(
|
274 |
+
ffnet_head_type="A",
|
275 |
+
task="classification",
|
276 |
+
num_classes=1000,
|
277 |
+
model_name="ffnnet101_AAX",
|
278 |
+
backbone=resnet.Resnet101,
|
279 |
+
pretrained_weights_path=os.path.join(
|
280 |
+
model_weights_base_path,
|
281 |
+
"ffnet101/ffnet101_AAX_imagenet_state_dict_quarts.pth",
|
282 |
+
),
|
283 |
+
strict_loading=True,
|
284 |
+
)
|
285 |
+
|
286 |
+
|
287 |
+
@register_model
|
288 |
+
def classification_ffnet86_AAX():
|
289 |
+
return create_ffnet(
|
290 |
+
ffnet_head_type="A",
|
291 |
+
task="classification",
|
292 |
+
num_classes=1000,
|
293 |
+
model_name="ffnnet86_AAX",
|
294 |
+
backbone=resnet.Resnet86,
|
295 |
+
pretrained_weights_path=os.path.join(
|
296 |
+
model_weights_base_path,
|
297 |
+
"ffnet86/ffnet86_AAX_imagenet_state_dict_quarts.pth",
|
298 |
+
),
|
299 |
+
strict_loading=True,
|
300 |
+
)
|
301 |
+
|
302 |
+
|
303 |
+
@register_model
|
304 |
+
def classification_ffnet56_AAX():
|
305 |
+
return create_ffnet(
|
306 |
+
ffnet_head_type="A",
|
307 |
+
task="classification",
|
308 |
+
num_classes=1000,
|
309 |
+
model_name="ffnnet56_AAX",
|
310 |
+
backbone=resnet.Resnet56,
|
311 |
+
pretrained_weights_path=os.path.join(
|
312 |
+
model_weights_base_path,
|
313 |
+
"ffnet56/ffnet56_AAX_imagenet_state_dict_quarts.pth",
|
314 |
+
),
|
315 |
+
strict_loading=True,
|
316 |
+
)
|
317 |
+
|
318 |
+
|
319 |
+
@register_model
|
320 |
+
def classification_ffnet50_AAX():
|
321 |
+
return create_ffnet(
|
322 |
+
ffnet_head_type="A",
|
323 |
+
task="classification",
|
324 |
+
num_classes=1000,
|
325 |
+
model_name="ffnnet50_AAX",
|
326 |
+
backbone=resnet.Resnet50,
|
327 |
+
pretrained_weights_path=os.path.join(
|
328 |
+
model_weights_base_path,
|
329 |
+
"ffnet50/ffnet50_AAX_imagenet_state_dict_quarts.pth",
|
330 |
+
),
|
331 |
+
strict_loading=True,
|
332 |
+
)
|
333 |
+
|
334 |
+
|
335 |
+
@register_model
|
336 |
+
def classification_ffnet34_AAX():
|
337 |
+
return create_ffnet(
|
338 |
+
ffnet_head_type="A",
|
339 |
+
task="classification",
|
340 |
+
num_classes=1000,
|
341 |
+
model_name="ffnnet34_AAX",
|
342 |
+
backbone=resnet.Resnet34,
|
343 |
+
pretrained_weights_path=os.path.join(
|
344 |
+
model_weights_base_path,
|
345 |
+
"ffnet34/ffnet34_AAX_imagenet_state_dict_quarts.pth",
|
346 |
+
),
|
347 |
+
strict_loading=True,
|
348 |
+
)
|
349 |
+
|
350 |
+
|
351 |
+
@register_model
|
352 |
+
def classification_ffnet18_AAX():
|
353 |
+
return create_ffnet(
|
354 |
+
ffnet_head_type="A",
|
355 |
+
task="classification",
|
356 |
+
num_classes=1000,
|
357 |
+
model_name="ffnnet18_AAX",
|
358 |
+
backbone=resnet.Resnet18,
|
359 |
+
pretrained_weights_path=os.path.join(
|
360 |
+
model_weights_base_path,
|
361 |
+
"ffnet18/ffnet18_AAX_imagenet_state_dict_quarts.pth",
|
362 |
+
),
|
363 |
+
strict_loading=True,
|
364 |
+
)
|
365 |
+
|
366 |
+
|
367 |
+
##########################################################################################
|
368 |
+
##### This is an example of how these FFNet models would be initialized for training on
|
369 |
+
##### cityscapes with 2048x1024 images
|
370 |
+
##########################################################################################
|
371 |
+
@register_model
|
372 |
+
def segmentation_ffnet150_dAAC_train():
|
373 |
+
return create_ffnet(
|
374 |
+
ffnet_head_type="A",
|
375 |
+
task="segmentation_C",
|
376 |
+
num_classes=19,
|
377 |
+
model_name="ffnnet150_dAAC",
|
378 |
+
backbone=resnet.Resnet150_D,
|
379 |
+
pretrained_weights_path=os.path.join(
|
380 |
+
model_weights_base_path,
|
381 |
+
"ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
|
382 |
+
),
|
383 |
+
pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
|
384 |
+
strict_loading=False,
|
385 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import sys
|
5 |
+
|
6 |
+
_model_entrypoints = {}
|
7 |
+
|
8 |
+
|
9 |
+
def register_model(fn):
|
10 |
+
# lookup containing module
|
11 |
+
mod = sys.modules[fn.__module__]
|
12 |
+
# add model to __all__ in module
|
13 |
+
model_name = fn.__name__
|
14 |
+
if hasattr(mod, "__all__"):
|
15 |
+
mod.__all__.append(model_name)
|
16 |
+
else:
|
17 |
+
mod.__all__ = [model_name]
|
18 |
+
|
19 |
+
# add entries to registry dict/sets
|
20 |
+
_model_entrypoints[model_name] = fn
|
21 |
+
return fn
|
22 |
+
|
23 |
+
|
24 |
+
def model_entrypoint(model_name):
|
25 |
+
"""Fetch a model entrypoint for specified model name"""
|
26 |
+
if model_name in _model_entrypoints:
|
27 |
+
return _model_entrypoints[model_name]
|
28 |
+
else:
|
29 |
+
raise RuntimeError(
|
30 |
+
f"Unknown model ({model_name}); known models are: "
|
31 |
+
f"{_model_entrypoints.keys()}"
|
32 |
+
)
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py
ADDED
@@ -0,0 +1,593 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
#########################################################################
|
5 |
+
# Code adapted from https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
|
6 |
+
|
7 |
+
# The original source code was made available under the following license
|
8 |
+
# BSD 3-Clause License
|
9 |
+
#
|
10 |
+
# Copyright (c) Soumith Chintala 2016,
|
11 |
+
# All rights reserved.
|
12 |
+
#
|
13 |
+
# Redistribution and use in source and binary forms, with or without
|
14 |
+
# modification, are permitted provided that the following conditions are met:
|
15 |
+
#
|
16 |
+
# * Redistributions of source code must retain the above copyright notice, this
|
17 |
+
# list of conditions and the following disclaimer.
|
18 |
+
#
|
19 |
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
20 |
+
# this list of conditions and the following disclaimer in the documentation
|
21 |
+
# and/or other materials provided with the distribution.
|
22 |
+
#
|
23 |
+
# * Neither the name of the copyright holder nor the names of its
|
24 |
+
# contributors may be used to endorse or promote products derived from
|
25 |
+
# this software without specific prior written permission.
|
26 |
+
#
|
27 |
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
28 |
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
29 |
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
30 |
+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
31 |
+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
32 |
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
33 |
+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
34 |
+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
35 |
+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
36 |
+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
37 |
+
#########################################################################
|
38 |
+
|
39 |
+
#########################################################################
|
40 |
+
#### **The main takeaway is that simple FFNets made out of resnet backbones made using basic-block
|
41 |
+
#### **are just as competitive as complex architectures such as HRNet, DDRNet, FANet etc.
|
42 |
+
|
43 |
+
#### New and old ResNet backbones, designed for use with FFNet. These do not have a classification
|
44 |
+
#### head attached here. ImageNet training of these backbones is done as an FFNet with a classification
|
45 |
+
#### head attached. See ffnet.py and ffnet_blocks.py.
|
46 |
+
#### Also, these models do not make a distinction between GPU and mobile because the elements that we change
|
47 |
+
#### between the two are among the additional modules that FFNet adds.
|
48 |
+
#########################################################################
|
49 |
+
import torch
|
50 |
+
|
51 |
+
#### These are weights for the backbone when trained directly with a classification head attached at the end of the
|
52 |
+
#### backbone, and not as part of the FFNet structure. For a minor training accuracy advantage, one could use these
|
53 |
+
#### weights as the initialization for the relevant models in the new family of models,
|
54 |
+
#### but training from scratch works nearly equally well
|
55 |
+
model_paths = {
|
56 |
+
"resnet18": "/pretrained_weights/resnet18.pth",
|
57 |
+
"resnet34": "/pretrained_weights/resnet34.pth",
|
58 |
+
"resnet50": "/pretrained_weights/resnet50.pth",
|
59 |
+
"resnet101": "/pretrained_weights/resnet101.pth",
|
60 |
+
}
|
61 |
+
|
62 |
+
import torch.nn as nn
|
63 |
+
import torch._utils
|
64 |
+
|
65 |
+
|
66 |
+
BN_MOMENTUM = 0.1
|
67 |
+
relu_inplace = True
|
68 |
+
|
69 |
+
|
70 |
+
def conv3x3(in_planes, out_planes, stride=1):
|
71 |
+
"""3x3 convolution with padding"""
|
72 |
+
return nn.Conv2d(
|
73 |
+
in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
|
74 |
+
)
|
75 |
+
|
76 |
+
|
77 |
+
def conv1x1(in_planes, out_planes, stride=1):
|
78 |
+
"""1x1 convolution"""
|
79 |
+
return nn.Conv2d(
|
80 |
+
in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False
|
81 |
+
)
|
82 |
+
|
83 |
+
|
84 |
+
class BasicBlock(nn.Module):
|
85 |
+
expansion = 1
|
86 |
+
|
87 |
+
def __init__(self, in_chan, out_chan, stride=1):
|
88 |
+
super(BasicBlock, self).__init__()
|
89 |
+
self.conv1 = conv3x3(in_chan, out_chan, stride)
|
90 |
+
self.bn1 = nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM)
|
91 |
+
self.conv2 = conv3x3(out_chan, out_chan)
|
92 |
+
self.bn2 = nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM)
|
93 |
+
self.relu = nn.ReLU(inplace=relu_inplace)
|
94 |
+
self.downsample = None
|
95 |
+
if in_chan != out_chan or stride != 1:
|
96 |
+
self.downsample = nn.Sequential(
|
97 |
+
nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False),
|
98 |
+
nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM),
|
99 |
+
)
|
100 |
+
|
101 |
+
def forward(self, x):
|
102 |
+
|
103 |
+
out = self.conv1(x)
|
104 |
+
out = self.bn1(out)
|
105 |
+
out = self.relu(out)
|
106 |
+
out = self.conv2(out)
|
107 |
+
out = self.bn2(out)
|
108 |
+
|
109 |
+
shortcut = x
|
110 |
+
if self.downsample is not None:
|
111 |
+
shortcut = self.downsample(x)
|
112 |
+
|
113 |
+
out_ = shortcut + out
|
114 |
+
out_ = self.relu(out_)
|
115 |
+
return out_
|
116 |
+
|
117 |
+
|
118 |
+
class Bottleneck(nn.Module):
|
119 |
+
expansion = 4
|
120 |
+
|
121 |
+
def __init__(self, in_chan, out_chan, stride=1, base_width=64):
|
122 |
+
super(Bottleneck, self).__init__()
|
123 |
+
width = int(out_chan * (base_width / 64.0)) * 1
|
124 |
+
self.conv1 = conv1x1(in_chan, width)
|
125 |
+
self.bn1 = nn.BatchNorm2d(width, momentum=BN_MOMENTUM)
|
126 |
+
self.conv2 = conv3x3(width, width, stride)
|
127 |
+
self.bn2 = nn.BatchNorm2d(width, momentum=BN_MOMENTUM)
|
128 |
+
self.conv3 = conv1x1(width, out_chan * self.expansion)
|
129 |
+
self.bn3 = nn.BatchNorm2d(out_chan * self.expansion, momentum=BN_MOMENTUM)
|
130 |
+
self.relu = nn.ReLU(inplace=relu_inplace)
|
131 |
+
self.downsample = None
|
132 |
+
if in_chan != out_chan * self.expansion or stride != 1:
|
133 |
+
self.downsample = nn.Sequential(
|
134 |
+
nn.Conv2d(
|
135 |
+
in_chan,
|
136 |
+
out_chan * self.expansion,
|
137 |
+
kernel_size=1,
|
138 |
+
stride=stride,
|
139 |
+
bias=False,
|
140 |
+
),
|
141 |
+
nn.BatchNorm2d(out_chan * self.expansion, momentum=BN_MOMENTUM),
|
142 |
+
)
|
143 |
+
|
144 |
+
def forward(self, x):
|
145 |
+
|
146 |
+
out = self.conv1(x)
|
147 |
+
out = self.bn1(out)
|
148 |
+
out = self.relu(out)
|
149 |
+
|
150 |
+
out = self.conv2(out)
|
151 |
+
out = self.bn2(out)
|
152 |
+
out = self.relu(out)
|
153 |
+
|
154 |
+
out = self.conv3(out)
|
155 |
+
out = self.bn3(out)
|
156 |
+
|
157 |
+
shortcut = x
|
158 |
+
if self.downsample is not None:
|
159 |
+
shortcut = self.downsample(x)
|
160 |
+
|
161 |
+
out_ = shortcut + out
|
162 |
+
out_ = self.relu(out_)
|
163 |
+
|
164 |
+
return out_
|
165 |
+
|
166 |
+
|
167 |
+
##########################################################################################
|
168 |
+
##### Vanilla ResNets, but with a more filled out model space, and primarily using basic blocks
|
169 |
+
##########################################################################################
|
170 |
+
|
171 |
+
|
172 |
+
class ResNet(nn.Module):
|
173 |
+
def __init__(
|
174 |
+
self,
|
175 |
+
block,
|
176 |
+
layers,
|
177 |
+
strides,
|
178 |
+
pretrained_path=None,
|
179 |
+
branch_chans=[64, 128, 256, 512],
|
180 |
+
):
|
181 |
+
super(ResNet, self).__init__()
|
182 |
+
self.pretrained_path = pretrained_path
|
183 |
+
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
184 |
+
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
|
185 |
+
self.relu = nn.ReLU(inplace=relu_inplace)
|
186 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
187 |
+
self.inplanes = 64
|
188 |
+
self.layer1 = self._make_layer(
|
189 |
+
block, branch_chans[0], bnum=layers[0], stride=strides[0]
|
190 |
+
)
|
191 |
+
self.layer2 = self._make_layer(
|
192 |
+
block, branch_chans[1], bnum=layers[1], stride=strides[1]
|
193 |
+
)
|
194 |
+
self.layer3 = self._make_layer(
|
195 |
+
block, branch_chans[2], bnum=layers[2], stride=strides[2]
|
196 |
+
)
|
197 |
+
self.layer4 = self._make_layer(
|
198 |
+
block, branch_chans[3], bnum=layers[3], stride=strides[3]
|
199 |
+
)
|
200 |
+
self.out_channels = [x * block.expansion for x in branch_chans]
|
201 |
+
|
202 |
+
def _make_layer(self, block, out_chan, bnum, stride=1):
|
203 |
+
layers = [block(self.inplanes, out_chan, stride=stride)]
|
204 |
+
self.inplanes = out_chan * block.expansion
|
205 |
+
for i in range(bnum - 1):
|
206 |
+
layers.append(block(self.inplanes, out_chan, stride=1))
|
207 |
+
return nn.Sequential(*layers)
|
208 |
+
|
209 |
+
def forward(self, x):
|
210 |
+
x = self.conv1(x)
|
211 |
+
x = self.relu(self.bn1(x))
|
212 |
+
x = self.maxpool(x)
|
213 |
+
|
214 |
+
feat4 = self.layer1(x)
|
215 |
+
feat8 = self.layer2(feat4) # 1/8
|
216 |
+
feat16 = self.layer3(feat8) # 1/16
|
217 |
+
feat32 = self.layer4(feat16) # 1/32
|
218 |
+
return feat4, feat8, feat16, feat32
|
219 |
+
|
220 |
+
def load_weights(self, pretrained_path=None):
|
221 |
+
if not pretrained_path:
|
222 |
+
pretrained_path = self.pretrained_path
|
223 |
+
if self.pretrained_path or pretrained_path:
|
224 |
+
pretrained_dict = torch.load(
|
225 |
+
pretrained_path, map_location={"cuda:0": "cpu"}
|
226 |
+
)
|
227 |
+
print(f"Loading backbone weights from {pretrained_path} with strict=False")
|
228 |
+
print(f"Caution!! Things could silently fail here")
|
229 |
+
self.load_state_dict(pretrained_dict, strict=False)
|
230 |
+
else:
|
231 |
+
print("No backbone weights loaded")
|
232 |
+
|
233 |
+
|
234 |
+
##########################################################################################
|
235 |
+
##### Vanilla ResNet instantiations
|
236 |
+
##### The versions marked with _D are not trained on ImageNet, and use the weights from
|
237 |
+
##### the respective models without a _D in the name
|
238 |
+
##########################################################################################
|
239 |
+
|
240 |
+
|
241 |
+
def Resnet18_D(**kwargs):
|
242 |
+
model = ResNet(BasicBlock, [2, 2, 2, 2], [2, 2, 2, 2]) # , model_paths["resnet18"])
|
243 |
+
return model
|
244 |
+
|
245 |
+
|
246 |
+
def Resnet18(**kwargs):
|
247 |
+
model = ResNet(BasicBlock, [2, 2, 2, 2], [1, 2, 2, 2]) # , model_paths["resnet18"])
|
248 |
+
return model
|
249 |
+
|
250 |
+
|
251 |
+
def Resnet34_D(**kwargs):
|
252 |
+
model = ResNet(BasicBlock, [3, 4, 6, 3], [2, 2, 2, 2]) # , model_paths["resnet34"])
|
253 |
+
return model
|
254 |
+
|
255 |
+
|
256 |
+
def Resnet34(**kwargs):
|
257 |
+
model = ResNet(BasicBlock, [3, 4, 6, 3], [1, 2, 2, 2]) # , model_paths["resnet34"])
|
258 |
+
return model
|
259 |
+
|
260 |
+
|
261 |
+
def Resnet50_D(**kwargs):
|
262 |
+
model = ResNet(Bottleneck, [3, 4, 6, 3], [2, 2, 2, 2]) # , model_paths["resnet50"])
|
263 |
+
return model
|
264 |
+
|
265 |
+
|
266 |
+
def Resnet50(**kwargs):
|
267 |
+
model = ResNet(Bottleneck, [3, 4, 6, 3], [1, 2, 2, 2]) # , model_paths["resnet50"])
|
268 |
+
return model
|
269 |
+
|
270 |
+
|
271 |
+
# can use model_paths["resnet34"] to initialize the weights here, for instance
|
272 |
+
def Resnet56_D(**kwargs):
|
273 |
+
model = ResNet(BasicBlock, [4, 8, 12, 3], [2, 2, 2, 2])
|
274 |
+
return model
|
275 |
+
|
276 |
+
|
277 |
+
def Resnet56(**kwargs):
|
278 |
+
model = ResNet(BasicBlock, [4, 8, 12, 3], [1, 2, 2, 2])
|
279 |
+
return model
|
280 |
+
|
281 |
+
|
282 |
+
def Resnet86_D(**kwargs):
|
283 |
+
model = ResNet(BasicBlock, [8, 12, 16, 6], [2, 2, 2, 2])
|
284 |
+
return model
|
285 |
+
|
286 |
+
|
287 |
+
def Resnet86(**kwargs):
|
288 |
+
model = ResNet(BasicBlock, [8, 12, 16, 6], [1, 2, 2, 2])
|
289 |
+
return model
|
290 |
+
|
291 |
+
|
292 |
+
def Resnet101_D(**kwargs):
|
293 |
+
model = ResNet(
|
294 |
+
Bottleneck, [3, 4, 23, 3], [2, 2, 2, 2]
|
295 |
+
) # , model_paths["resnet101"])
|
296 |
+
return model
|
297 |
+
|
298 |
+
|
299 |
+
def Resnet101(**kwargs):
|
300 |
+
model = ResNet(
|
301 |
+
Bottleneck, [3, 4, 23, 3], [1, 2, 2, 2]
|
302 |
+
) # , model_paths["resnet101"])
|
303 |
+
return model
|
304 |
+
|
305 |
+
|
306 |
+
def Resnet134_D(**kwargs):
|
307 |
+
model = ResNet(BasicBlock, [8, 18, 28, 12], [2, 2, 2, 2])
|
308 |
+
return model
|
309 |
+
|
310 |
+
|
311 |
+
def Resnet134(**kwargs):
|
312 |
+
model = ResNet(BasicBlock, [8, 18, 28, 12], [1, 2, 2, 2])
|
313 |
+
return model
|
314 |
+
|
315 |
+
|
316 |
+
def Resnet150_D(**kwargs):
|
317 |
+
model = ResNet(BasicBlock, [16, 18, 28, 12], [2, 2, 2, 2])
|
318 |
+
return model
|
319 |
+
|
320 |
+
|
321 |
+
def Resnet150(**kwargs):
|
322 |
+
model = ResNet(BasicBlock, [16, 18, 28, 12], [1, 2, 2, 2])
|
323 |
+
return model
|
324 |
+
|
325 |
+
|
326 |
+
##########################################################################################
|
327 |
+
##### Slim ResNets. Narrower, with a deeper stem
|
328 |
+
##########################################################################################
|
329 |
+
|
330 |
+
|
331 |
+
class ResNetS(nn.Module):
|
332 |
+
def __init__(
|
333 |
+
self,
|
334 |
+
block,
|
335 |
+
layers,
|
336 |
+
strides,
|
337 |
+
pretrained_path=None,
|
338 |
+
branch_chans=[64, 128, 192, 320],
|
339 |
+
):
|
340 |
+
super(ResNetS, self).__init__()
|
341 |
+
self.pretrained_path = pretrained_path
|
342 |
+
self.conv0 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
|
343 |
+
self.bn0 = nn.BatchNorm2d(32, momentum=BN_MOMENTUM)
|
344 |
+
self.relu0 = nn.ReLU(inplace=relu_inplace)
|
345 |
+
self.conv1 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False)
|
346 |
+
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
|
347 |
+
self.relu1 = nn.ReLU(inplace=relu_inplace)
|
348 |
+
self.inplanes = 64
|
349 |
+
self.layer1 = self._make_layer(
|
350 |
+
block, branch_chans[0], bnum=layers[0], stride=strides[0]
|
351 |
+
)
|
352 |
+
self.layer2 = self._make_layer(
|
353 |
+
block, branch_chans[1], bnum=layers[1], stride=strides[1]
|
354 |
+
)
|
355 |
+
self.layer3 = self._make_layer(
|
356 |
+
block, branch_chans[2], bnum=layers[2], stride=strides[2]
|
357 |
+
)
|
358 |
+
self.layer4 = self._make_layer(
|
359 |
+
block, branch_chans[3], bnum=layers[3], stride=strides[3]
|
360 |
+
)
|
361 |
+
self.out_channels = [x * block.expansion for x in branch_chans]
|
362 |
+
|
363 |
+
def _make_layer(self, block, out_chan, bnum, stride=1):
|
364 |
+
layers = [block(self.inplanes, out_chan, stride=stride)]
|
365 |
+
self.inplanes = out_chan * block.expansion
|
366 |
+
for i in range(bnum - 1):
|
367 |
+
layers.append(block(self.inplanes, out_chan, stride=1))
|
368 |
+
return nn.Sequential(*layers)
|
369 |
+
|
370 |
+
def forward(self, x):
|
371 |
+
x = self.conv0(x)
|
372 |
+
x = self.relu0(self.bn0(x))
|
373 |
+
x = self.relu1(self.bn1(self.conv1(x)))
|
374 |
+
|
375 |
+
feat4 = self.layer1(x)
|
376 |
+
feat8 = self.layer2(feat4) # 1/8
|
377 |
+
feat16 = self.layer3(feat8) # 1/16
|
378 |
+
feat32 = self.layer4(feat16) # 1/32
|
379 |
+
return feat4, feat8, feat16, feat32
|
380 |
+
|
381 |
+
def load_weights(self, pretrained_path=None):
|
382 |
+
if not pretrained_path:
|
383 |
+
pretrained_path = self.pretrained_path
|
384 |
+
if self.pretrained_path or pretrained_path:
|
385 |
+
pretrained_dict = torch.load(
|
386 |
+
pretrained_path, map_location={"cuda:0": "cpu"}
|
387 |
+
)
|
388 |
+
print(f"Loading backbone weights from {pretrained_path} with strict=False")
|
389 |
+
print(f"Caution!! Things could silently fail here")
|
390 |
+
self.load_state_dict(pretrained_dict, strict=False)
|
391 |
+
else:
|
392 |
+
print("No backbone weights loaded")
|
393 |
+
|
394 |
+
|
395 |
+
##########################################################################################
|
396 |
+
##### Slim ResNet Instantiations
|
397 |
+
##### The versions marked with _D are not trained on ImageNet, and use the weights from
|
398 |
+
##### the respective models without a _D in the name
|
399 |
+
##########################################################################################
|
400 |
+
|
401 |
+
|
402 |
+
def Resnet22S_D(**kwargs):
|
403 |
+
model = ResNetS(BasicBlock, [2, 3, 3, 2], [2, 2, 2, 2])
|
404 |
+
return model
|
405 |
+
|
406 |
+
|
407 |
+
def Resnet22S(**kwargs):
|
408 |
+
model = ResNetS(BasicBlock, [2, 3, 3, 2], [1, 2, 2, 2])
|
409 |
+
return model
|
410 |
+
|
411 |
+
|
412 |
+
def Resnet30S_D(**kwargs):
|
413 |
+
model = ResNetS(BasicBlock, [3, 4, 4, 3], [2, 2, 2, 2])
|
414 |
+
return model
|
415 |
+
|
416 |
+
|
417 |
+
def Resnet30S(**kwargs):
|
418 |
+
model = ResNetS(BasicBlock, [3, 4, 4, 3], [1, 2, 2, 2])
|
419 |
+
return model
|
420 |
+
|
421 |
+
|
422 |
+
def Resnet40S_D(**kwargs):
|
423 |
+
model = ResNetS(BasicBlock, [4, 5, 6, 4], [2, 2, 2, 2])
|
424 |
+
return model
|
425 |
+
|
426 |
+
|
427 |
+
def Resnet40S(**kwargs):
|
428 |
+
model = ResNetS(BasicBlock, [4, 5, 6, 4], [1, 2, 2, 2])
|
429 |
+
return model
|
430 |
+
|
431 |
+
|
432 |
+
def Resnet54S_D(**kwargs):
|
433 |
+
model = ResNetS(BasicBlock, [5, 8, 8, 5], [2, 2, 2, 2])
|
434 |
+
return model
|
435 |
+
|
436 |
+
|
437 |
+
def Resnet54S(**kwargs):
|
438 |
+
model = ResNetS(BasicBlock, [5, 8, 8, 5], [1, 2, 2, 2])
|
439 |
+
return model
|
440 |
+
|
441 |
+
|
442 |
+
def Resnet78S_D(**kwargs):
|
443 |
+
model = ResNetS(BasicBlock, [6, 12, 12, 8], [2, 2, 2, 2])
|
444 |
+
return model
|
445 |
+
|
446 |
+
|
447 |
+
def Resnet78S(**kwargs):
|
448 |
+
model = ResNetS(BasicBlock, [6, 12, 12, 8], [1, 2, 2, 2])
|
449 |
+
return model
|
450 |
+
|
451 |
+
|
452 |
+
def Resnet86S_D(**kwargs):
|
453 |
+
model = ResNetS(BasicBlock, [8, 12, 16, 6], [2, 2, 2, 2])
|
454 |
+
return model
|
455 |
+
|
456 |
+
|
457 |
+
def Resnet86S(**kwargs):
|
458 |
+
model = ResNetS(BasicBlock, [8, 12, 16, 6], [1, 2, 2, 2])
|
459 |
+
return model
|
460 |
+
|
461 |
+
|
462 |
+
def Resnet150S_D(**kwargs):
|
463 |
+
model = ResNetS(BasicBlock, [16, 18, 28, 12], [2, 2, 2, 2])
|
464 |
+
return model
|
465 |
+
|
466 |
+
|
467 |
+
def Resnet150S(**kwargs):
|
468 |
+
model = ResNetS(BasicBlock, [16, 18, 28, 12], [1, 2, 2, 2])
|
469 |
+
return model
|
470 |
+
|
471 |
+
|
472 |
+
##########################################################################################
|
473 |
+
##### 3 Stage ResNets
|
474 |
+
##########################################################################################
|
475 |
+
|
476 |
+
|
477 |
+
class ResNetNarrow(nn.Module):
|
478 |
+
def __init__(
|
479 |
+
self,
|
480 |
+
block,
|
481 |
+
layers,
|
482 |
+
strides,
|
483 |
+
pretrained_path=None,
|
484 |
+
branch_chans=[64, 96, 160, 320],
|
485 |
+
):
|
486 |
+
super(ResNetNarrow, self).__init__()
|
487 |
+
self.pretrained_path = pretrained_path
|
488 |
+
# self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
489 |
+
self.conv0 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
|
490 |
+
self.bn0 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
|
491 |
+
self.relu0 = nn.ReLU(inplace=relu_inplace)
|
492 |
+
self.conv1 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
|
493 |
+
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
|
494 |
+
self.relu1 = nn.ReLU(inplace=relu_inplace)
|
495 |
+
self.conv2 = nn.Conv2d(
|
496 |
+
64, branch_chans[0], kernel_size=3, stride=1, padding=1, bias=False
|
497 |
+
)
|
498 |
+
self.bn2 = nn.BatchNorm2d(branch_chans[0], momentum=BN_MOMENTUM)
|
499 |
+
self.relu2 = nn.ReLU(inplace=relu_inplace)
|
500 |
+
self.inplanes = branch_chans[0]
|
501 |
+
self.layer1 = self._make_layer(
|
502 |
+
block, branch_chans[1], bnum=layers[0], stride=strides[0]
|
503 |
+
)
|
504 |
+
self.layer2 = self._make_layer(
|
505 |
+
block, branch_chans[2], bnum=layers[1], stride=strides[1]
|
506 |
+
)
|
507 |
+
self.layer3 = self._make_layer(
|
508 |
+
block, branch_chans[3], bnum=layers[2], stride=strides[2]
|
509 |
+
)
|
510 |
+
# Always load weights, and re-init from scratch if pre-trained is not specified. A little costly, but less messy
|
511 |
+
# self.apply(seg_model_weight_initializer) #For layers not present in the snapshot ??
|
512 |
+
# self.load_weights(pretrained_path)
|
513 |
+
# branch_chans = [64, 96, 160, 320]
|
514 |
+
self.out_channels = [x * block.expansion for x in branch_chans]
|
515 |
+
|
516 |
+
def _make_layer(self, block, out_chan, bnum, stride=1):
|
517 |
+
layers = [block(self.inplanes, out_chan, stride=stride)]
|
518 |
+
self.inplanes = out_chan * block.expansion
|
519 |
+
for i in range(bnum - 1):
|
520 |
+
layers.append(block(self.inplanes, out_chan, stride=1))
|
521 |
+
return nn.Sequential(*layers)
|
522 |
+
|
523 |
+
def forward(self, x):
|
524 |
+
x = self.conv0(x)
|
525 |
+
x = self.relu0(self.bn0(x))
|
526 |
+
x = self.relu1(self.bn1(self.conv1(x)))
|
527 |
+
feat4 = self.relu2(self.bn2(self.conv2(x)))
|
528 |
+
|
529 |
+
feat8 = self.layer1(feat4) # 1/8
|
530 |
+
feat16 = self.layer2(feat8) # 1/16
|
531 |
+
feat32 = self.layer3(feat16) # 1/32
|
532 |
+
return feat4, feat8, feat16, feat32
|
533 |
+
|
534 |
+
def load_weights(self, pretrained_path=None):
|
535 |
+
if not pretrained_path:
|
536 |
+
pretrained_path = self.pretrained_path
|
537 |
+
if self.pretrained_path or pretrained_path:
|
538 |
+
pretrained_dict = torch.load(
|
539 |
+
pretrained_path, map_location={"cuda:0": "cpu"}
|
540 |
+
)
|
541 |
+
print(f"Loading backbone weights from {pretrained_path} with strict=False")
|
542 |
+
print(f"Caution!! Things could silently fail here")
|
543 |
+
self.load_state_dict(pretrained_dict, strict=False)
|
544 |
+
else:
|
545 |
+
print("No backbone weights loaded")
|
546 |
+
|
547 |
+
|
548 |
+
##########################################################################################
|
549 |
+
##### 3 Stage ResNet Instantiations
|
550 |
+
##### These backbones do not differ between imagenet and cityscapes
|
551 |
+
##########################################################################################
|
552 |
+
|
553 |
+
|
554 |
+
def Resnet122N(**kwargs):
|
555 |
+
model = ResNetNarrow(
|
556 |
+
BasicBlock, [16, 24, 20], [2, 2, 2], branch_chans=[64, 96, 160, 320]
|
557 |
+
)
|
558 |
+
return model
|
559 |
+
|
560 |
+
|
561 |
+
def Resnet74N(**kwargs):
|
562 |
+
model = ResNetNarrow(
|
563 |
+
BasicBlock, [8, 12, 16], [2, 2, 2], branch_chans=[64, 96, 160, 320]
|
564 |
+
)
|
565 |
+
return model
|
566 |
+
|
567 |
+
|
568 |
+
def Resnet46N(**kwargs):
|
569 |
+
model = ResNetNarrow(
|
570 |
+
BasicBlock, [6, 8, 8], [2, 2, 2], branch_chans=[64, 96, 160, 320]
|
571 |
+
)
|
572 |
+
return model
|
573 |
+
|
574 |
+
|
575 |
+
def Resnet122NS(**kwargs):
|
576 |
+
model = ResNetNarrow(
|
577 |
+
BasicBlock, [16, 24, 20], [2, 2, 2], branch_chans=[64, 64, 128, 256]
|
578 |
+
)
|
579 |
+
return model
|
580 |
+
|
581 |
+
|
582 |
+
def Resnet74NS(**kwargs):
|
583 |
+
model = ResNetNarrow(
|
584 |
+
BasicBlock, [8, 12, 16], [2, 2, 2], branch_chans=[64, 64, 128, 256]
|
585 |
+
)
|
586 |
+
return model
|
587 |
+
|
588 |
+
|
589 |
+
def Resnet46NS(**kwargs):
|
590 |
+
model = ResNetNarrow(
|
591 |
+
BasicBlock, [6, 8, 8], [2, 2, 2], branch_chans=[64, 64, 128, 256]
|
592 |
+
)
|
593 |
+
return model
|
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022 Qualcomm Technologies, Inc.
|
2 |
+
# All Rights Reserved.
|
3 |
+
|
4 |
+
import torch
|
5 |
+
from torch import nn
|
6 |
+
from torch.nn import init
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
|
10 |
+
def dense_kernel_initializer(tensor):
|
11 |
+
_, fan_out = nn.init._calculate_fan_in_and_fan_out(tensor)
|
12 |
+
init_range = 1.0 / np.sqrt(fan_out)
|
13 |
+
|
14 |
+
return nn.init.uniform_(tensor, a=-init_range, b=init_range)
|
15 |
+
|
16 |
+
|
17 |
+
def model_weight_initializer(m):
|
18 |
+
"""
|
19 |
+
Usage:
|
20 |
+
model = Model()
|
21 |
+
model.apply(weight_init)
|
22 |
+
"""
|
23 |
+
if isinstance(m, nn.Conv2d):
|
24 |
+
# Yes, this non-fancy init is on purpose,
|
25 |
+
# and seems to work better in practice for segmentation
|
26 |
+
if hasattr(m, "weight"):
|
27 |
+
nn.init.normal_(m.weight, std=0.01)
|
28 |
+
if m.bias is not None:
|
29 |
+
nn.init.constant_(m.bias, 0.0001)
|
30 |
+
|
31 |
+
elif isinstance(m, nn.BatchNorm2d):
|
32 |
+
nn.init.constant_(m.weight, 1)
|
33 |
+
nn.init.constant_(m.bias, 0)
|
34 |
+
|
35 |
+
elif isinstance(m, nn.Linear):
|
36 |
+
dense_kernel_initializer(m.weight.data)
|
37 |
+
if m.bias is not None:
|
38 |
+
nn.init.zeros_(m.bias.data)
|
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
### Source model
|
3 |
+
- Input shape: 1x3x512x1024
|
4 |
+
- Number of parameters:18.04M
|
5 |
+
- Model size:69.4MB,
|
6 |
+
- Output shape: 1x19x64x128
|
7 |
+
|
8 |
+
Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: INT8
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: SNM972 QCS8550
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
#### python
|
43 |
+
```bash
|
44 |
+
cd python
|
45 |
+
python3 demo_qnn.py
|
46 |
+
```
|
47 |
+
|
48 |
+
#### c++
|
49 |
+
```bash
|
50 |
+
cd ffnet54s/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp
|
51 |
+
mkdir build && cd build
|
52 |
+
cmake ..
|
53 |
+
make
|
54 |
+
./run_test
|
55 |
+
```
|
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png
ADDED
![]() |
Git LFS Details
|
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required (VERSION 3.5)
|
2 |
+
project("run_test")
|
3 |
+
|
4 |
+
find_package(OpenCV REQUIRED)
|
5 |
+
|
6 |
+
message(STATUS "oPENCV Library status:")
|
7 |
+
message(STATUS ">version:${OpenCV_VERSION}")
|
8 |
+
message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
|
9 |
+
|
10 |
+
set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
|
11 |
+
|
12 |
+
include_directories(
|
13 |
+
/usr/local/include
|
14 |
+
/usr/include/opencv4
|
15 |
+
)
|
16 |
+
|
17 |
+
link_directories(
|
18 |
+
/usr/local/lib/
|
19 |
+
)
|
20 |
+
|
21 |
+
file(GLOB SRC_LISTS
|
22 |
+
${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
|
23 |
+
)
|
24 |
+
|
25 |
+
add_executable(run_test ${SRC_LISTS})
|
26 |
+
|
27 |
+
target_link_libraries(run_test
|
28 |
+
aidlite
|
29 |
+
${OpenCV_LIBS}
|
30 |
+
pthread
|
31 |
+
)
|
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp
ADDED
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <string>
|
3 |
+
#include <algorithm>
|
4 |
+
#include <cctype>
|
5 |
+
#include <cstring> // 用于 memcpy
|
6 |
+
#include <opencv2/opencv.hpp>
|
7 |
+
#include <aidlux/aidlite/aidlite.hpp>
|
8 |
+
#include <vector>
|
9 |
+
#include <numeric>
|
10 |
+
#include <cmath>
|
11 |
+
#include <array>
|
12 |
+
#include <cstdint>
|
13 |
+
|
14 |
+
using namespace cv;
|
15 |
+
using namespace std;
|
16 |
+
using namespace Aidlux::Aidlite;
|
17 |
+
|
18 |
+
// 定義顏色表 (19個類別)
|
19 |
+
const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
|
20 |
+
{0, 0, 0}, // 0=background
|
21 |
+
{128, 0, 0}, // 1=aeroplane
|
22 |
+
{0, 128, 0}, // 2=bicycle
|
23 |
+
{128, 128, 0}, // 3=bird
|
24 |
+
{0, 0, 128}, // 4=boat
|
25 |
+
{128, 0, 128}, // 5=bottle
|
26 |
+
{0, 128, 128}, // 6=bus
|
27 |
+
{128, 128, 128}, // 7=car
|
28 |
+
{64, 0, 0}, // 8=cat
|
29 |
+
{192, 0, 0}, // 9=chair
|
30 |
+
{64, 128, 0}, // 10=cow
|
31 |
+
{192, 128, 0}, // 11=dining table
|
32 |
+
{64, 0, 128}, // 12=dog
|
33 |
+
{192, 0, 128}, // 13=horse
|
34 |
+
{64, 128, 128}, // 14=motorbike
|
35 |
+
{192, 128, 128}, // 15=person
|
36 |
+
{0, 64, 0}, // 16=potted plant
|
37 |
+
{128, 64, 0}, // 17=sheep
|
38 |
+
{0, 192, 0}, // 18=sofa
|
39 |
+
}};
|
40 |
+
|
41 |
+
// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
|
42 |
+
std::vector<float> bilinear_interpolate(
|
43 |
+
const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
|
44 |
+
|
45 |
+
std::vector<float> output(target_h * target_w * channels, 0.0f);
|
46 |
+
const float scale_h = static_cast<float>(src_h) / target_h;
|
47 |
+
const float scale_w = static_cast<float>(src_w) / target_w;
|
48 |
+
|
49 |
+
for (int y = 0; y < target_h; ++y) {
|
50 |
+
const float y_src = (y + 0.5f) * scale_h - 0.5f;
|
51 |
+
const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
|
52 |
+
const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
|
53 |
+
const float dy = y_src - y0;
|
54 |
+
|
55 |
+
for (int x = 0; x < target_w; ++x) {
|
56 |
+
const float x_src = (x + 0.5f) * scale_w - 0.5f;
|
57 |
+
const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
|
58 |
+
const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
|
59 |
+
const float dx = x_src - x0;
|
60 |
+
|
61 |
+
for (int c = 0; c < channels; ++c) {
|
62 |
+
const int src_idx = c * src_h * src_w;
|
63 |
+
const float val00 = input[src_idx + y0 * src_w + x0];
|
64 |
+
const float val01 = input[src_idx + y0 * src_w + x1];
|
65 |
+
const float val10 = input[src_idx + y1 * src_w + x0];
|
66 |
+
const float val11 = input[src_idx + y1 * src_w + x1];
|
67 |
+
|
68 |
+
const float val = (1 - dy) * (1 - dx) * val00 +
|
69 |
+
(1 - dy) * dx * val01 +
|
70 |
+
dy * (1 - dx) * val10 +
|
71 |
+
dy * dx * val11;
|
72 |
+
|
73 |
+
output[c * target_h * target_w + y * target_w + x] = val;
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
return output;
|
78 |
+
}
|
79 |
+
|
80 |
+
// Softmax 計算 (通道維度)
|
81 |
+
void softmax(float* data, int height, int width, int channels) {
|
82 |
+
for (int y = 0; y < height; ++y) {
|
83 |
+
for (int x = 0; x < width; ++x) {
|
84 |
+
float max_val = -INFINITY;
|
85 |
+
for (int c = 0; c < channels; ++c) {
|
86 |
+
const int idx = c * height * width + y * width + x;
|
87 |
+
max_val = std::max(max_val, data[idx]);
|
88 |
+
}
|
89 |
+
|
90 |
+
float sum_exp = 0.0f;
|
91 |
+
for (int c = 0; c < channels; ++c) {
|
92 |
+
const int idx = c * height * width + y * width + x;
|
93 |
+
sum_exp += std::exp(data[idx] - max_val);
|
94 |
+
}
|
95 |
+
|
96 |
+
for (int c = 0; c < channels; ++c) {
|
97 |
+
const int idx = c * height * width + y * width + x;
|
98 |
+
data[idx] = std::exp(data[idx] - max_val) / sum_exp;
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
// 提取最大類別索引
|
105 |
+
std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
|
106 |
+
std::vector<uint8_t> pred(height * width);
|
107 |
+
for (int y = 0; y < height; ++y) {
|
108 |
+
for (int x = 0; x < width; ++x) {
|
109 |
+
float max_prob = -INFINITY;
|
110 |
+
uint8_t max_idx = 0;
|
111 |
+
for (int c = 0; c < channels; ++c) {
|
112 |
+
const int idx = c * height * width + y * width + x;
|
113 |
+
if (data[idx] > max_prob) {
|
114 |
+
max_prob = data[idx];
|
115 |
+
max_idx = c;
|
116 |
+
}
|
117 |
+
}
|
118 |
+
pred[y * width + x] = max_idx;
|
119 |
+
}
|
120 |
+
}
|
121 |
+
return pred;
|
122 |
+
}
|
123 |
+
|
124 |
+
// 解碼為 RGB 圖像
|
125 |
+
std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
|
126 |
+
std::vector<uint8_t> rgb(height * width * 3);
|
127 |
+
for (int y = 0; y < height; ++y) {
|
128 |
+
for (int x = 0; x < width; ++x) {
|
129 |
+
const int idx = y * width + x;
|
130 |
+
const uint8_t label = pred[idx];
|
131 |
+
if (label < 19) {
|
132 |
+
rgb[idx * 3] = label_colors[label][0];
|
133 |
+
rgb[idx * 3 + 1] = label_colors[label][1];
|
134 |
+
rgb[idx * 3 + 2] = label_colors[label][2];
|
135 |
+
} else {
|
136 |
+
rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
|
137 |
+
}
|
138 |
+
}
|
139 |
+
}
|
140 |
+
return rgb;
|
141 |
+
}
|
142 |
+
|
143 |
+
struct Args {
|
144 |
+
std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin";
|
145 |
+
std::string imgs = "../2.png";
|
146 |
+
int invoke_nums = 10;
|
147 |
+
std::string model_type = "QNN";
|
148 |
+
};
|
149 |
+
|
150 |
+
Args parse_args(int argc, char* argv[]) {
|
151 |
+
Args args;
|
152 |
+
for (int i = 1; i < argc; ++i) {
|
153 |
+
std::string arg = argv[i];
|
154 |
+
if (arg == "--target_model" && i + 1 < argc) {
|
155 |
+
args.target_model = argv[++i];
|
156 |
+
} else if (arg == "--imgs" && i + 1 < argc) {
|
157 |
+
args.imgs = argv[++i];
|
158 |
+
} else if (arg == "--invoke_nums" && i + 1 < argc) {
|
159 |
+
args.invoke_nums = std::stoi(argv[++i]);
|
160 |
+
} else if (arg == "--model_type" && i + 1 < argc) {
|
161 |
+
args.model_type = argv[++i];
|
162 |
+
}
|
163 |
+
}
|
164 |
+
return args;
|
165 |
+
}
|
166 |
+
|
167 |
+
std::string to_lower(const std::string& str) {
|
168 |
+
std::string lower_str = str;
|
169 |
+
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
|
170 |
+
return std::tolower(c);
|
171 |
+
});
|
172 |
+
return lower_str;
|
173 |
+
}
|
174 |
+
|
175 |
+
int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
|
176 |
+
|
177 |
+
int current_coordinate[4] = {0, 0, 0, 0};
|
178 |
+
for(int a = 0; a < src_dims[0]; ++a){
|
179 |
+
current_coordinate[0] = a;
|
180 |
+
for(int b = 0; b < src_dims[1]; ++b){
|
181 |
+
current_coordinate[1] = b;
|
182 |
+
for(int c = 0; c < src_dims[2]; ++c){
|
183 |
+
current_coordinate[2] = c;
|
184 |
+
for(int d = 0; d < src_dims[3]; ++d){
|
185 |
+
current_coordinate[3] = d;
|
186 |
+
|
187 |
+
int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
|
188 |
+
current_coordinate[1]*src_dims[2]*src_dims[3] +
|
189 |
+
current_coordinate[2]*src_dims[3] +
|
190 |
+
current_coordinate[3];
|
191 |
+
|
192 |
+
int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
193 |
+
current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
194 |
+
current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
|
195 |
+
current_coordinate[tsp_dims[3]];
|
196 |
+
|
197 |
+
dest[new_index] = src[old_index];
|
198 |
+
}
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
return EXIT_SUCCESS;
|
204 |
+
}
|
205 |
+
|
206 |
+
cv::Mat post_process(cv::Mat &frame, float* outdata)
|
207 |
+
{
|
208 |
+
cv::Mat input_image = frame.clone();
|
209 |
+
// Initialize vectors to hold respective outputs while unwrapping detections.
|
210 |
+
std::vector<int> class_ids;
|
211 |
+
std::vector<float> confidences;
|
212 |
+
std::vector<cv::Rect> boxes;
|
213 |
+
std::vector<cv::Mat> masks;
|
214 |
+
std::vector<float> class_scores;
|
215 |
+
cv::RNG rng;
|
216 |
+
cv::Mat masked_img;
|
217 |
+
|
218 |
+
unsigned int src_dims[4] = {1, 64,128,19};
|
219 |
+
unsigned int tsp_dims[4] = {0,3,1,2};
|
220 |
+
unsigned int stride_data_num = 1*64*128*19;
|
221 |
+
float* format_data = new float[stride_data_num];
|
222 |
+
transpose(outdata, src_dims, tsp_dims, format_data);
|
223 |
+
cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
|
224 |
+
std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
|
225 |
+
|
226 |
+
const int channels = 19;
|
227 |
+
int target_h = 64, target_w = 128;
|
228 |
+
int src_h = 64, src_w = 128;
|
229 |
+
// Step 1: 雙線性插值
|
230 |
+
auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
|
231 |
+
|
232 |
+
// Step 2: Softmax
|
233 |
+
softmax(interpolated.data(), target_h, target_w, channels);
|
234 |
+
|
235 |
+
// Step 3: 獲取預測類別
|
236 |
+
auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
|
237 |
+
|
238 |
+
printf("Processing finished.\n");
|
239 |
+
// Step 4: 解碼為 RGB
|
240 |
+
std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
|
241 |
+
cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
|
242 |
+
|
243 |
+
// Step 2: 转换颜色通道 (RGB → BGR)
|
244 |
+
if (channels == 3) {
|
245 |
+
cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
|
246 |
+
}
|
247 |
+
cv::Mat resized_cubic;
|
248 |
+
cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
|
249 |
+
return resized_cubic;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
+
int invoke(const Args& args) {
|
254 |
+
std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
|
255 |
+
<< "Image Path: " << args.imgs << "\n"
|
256 |
+
<< "Inference Nums: " << args.invoke_nums << "\n"
|
257 |
+
<< "Model Type: " << args.model_type << "\n";
|
258 |
+
Model* model = Model::create_instance(args.target_model);
|
259 |
+
if(model == nullptr){
|
260 |
+
printf("Create model failed !\n");
|
261 |
+
return EXIT_FAILURE;
|
262 |
+
}
|
263 |
+
Config* config = Config::create_instance();
|
264 |
+
if(config == nullptr){
|
265 |
+
printf("Create config failed !\n");
|
266 |
+
return EXIT_FAILURE;
|
267 |
+
}
|
268 |
+
config->implement_type = ImplementType::TYPE_LOCAL;
|
269 |
+
std::string model_type_lower = to_lower(args.model_type);
|
270 |
+
if (model_type_lower == "qnn"){
|
271 |
+
config->framework_type = FrameworkType::TYPE_QNN;
|
272 |
+
} else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
|
273 |
+
config->framework_type = FrameworkType::TYPE_SNPE2;
|
274 |
+
}
|
275 |
+
config->accelerate_type = AccelerateType::TYPE_DSP;
|
276 |
+
config->is_quantify_model = 1;
|
277 |
+
|
278 |
+
unsigned int model_h = 512;
|
279 |
+
unsigned int model_w = 1024;
|
280 |
+
std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
|
281 |
+
std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
|
282 |
+
model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
|
283 |
+
std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
|
284 |
+
if(fast_interpreter == nullptr){
|
285 |
+
printf("build_interpretper_from_model_and_config failed !\n");
|
286 |
+
return EXIT_FAILURE;
|
287 |
+
}
|
288 |
+
int result = fast_interpreter->init();
|
289 |
+
if(result != EXIT_SUCCESS){
|
290 |
+
printf("interpreter->init() failed !\n");
|
291 |
+
return EXIT_FAILURE;
|
292 |
+
}
|
293 |
+
// load model
|
294 |
+
fast_interpreter->load_model();
|
295 |
+
if(result != EXIT_SUCCESS){
|
296 |
+
printf("interpreter->load_model() failed !\n");
|
297 |
+
return EXIT_FAILURE;
|
298 |
+
}
|
299 |
+
printf("detect model load success!\n");
|
300 |
+
|
301 |
+
cv::Mat frame = cv::imread(args.imgs);
|
302 |
+
if (frame.empty()) {
|
303 |
+
printf("detect image load failed!\n");
|
304 |
+
return 1;
|
305 |
+
}
|
306 |
+
printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
|
307 |
+
cv::Mat input_data;
|
308 |
+
cv::Mat frame_clone = frame.clone();
|
309 |
+
cv::Scalar stds_scale(58.395, 57.12, 57.375);
|
310 |
+
cv::Scalar means_scale(123.675, 116.28, 103.53);
|
311 |
+
cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
|
312 |
+
frame_clone.convertTo(input_data, CV_32F);
|
313 |
+
cv::subtract(input_data, means_scale, input_data);
|
314 |
+
cv::divide(input_data, stds_scale, input_data);
|
315 |
+
|
316 |
+
float *outdata0 = nullptr;
|
317 |
+
std::vector<float> invoke_time;
|
318 |
+
for (int i = 0; i < args.invoke_nums; ++i) {
|
319 |
+
result = fast_interpreter->set_input_tensor(0, input_data.data);
|
320 |
+
if(result != EXIT_SUCCESS){
|
321 |
+
printf("interpreter->set_input_tensor() failed !\n");
|
322 |
+
return EXIT_FAILURE;
|
323 |
+
}
|
324 |
+
auto t1 = std::chrono::high_resolution_clock::now();
|
325 |
+
result = fast_interpreter->invoke();
|
326 |
+
auto t2 = std::chrono::high_resolution_clock::now();
|
327 |
+
std::chrono::duration<double> cost_time = t2 - t1;
|
328 |
+
invoke_time.push_back(cost_time.count() * 1000);
|
329 |
+
if(result != EXIT_SUCCESS){
|
330 |
+
printf("interpreter->invoke() failed !\n");
|
331 |
+
return EXIT_FAILURE;
|
332 |
+
}
|
333 |
+
uint32_t out_data_0 = 0;
|
334 |
+
result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
|
335 |
+
if(result != EXIT_SUCCESS){
|
336 |
+
printf("interpreter->get_output_tensor() 1 failed !\n");
|
337 |
+
return EXIT_FAILURE;
|
338 |
+
}
|
339 |
+
|
340 |
+
}
|
341 |
+
|
342 |
+
float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
|
343 |
+
float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
|
344 |
+
float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
|
345 |
+
float var_invoketime = 0.0f;
|
346 |
+
for (auto time : invoke_time) {
|
347 |
+
var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
|
348 |
+
}
|
349 |
+
var_invoketime /= args.invoke_nums;
|
350 |
+
printf("=======================================\n");
|
351 |
+
printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
|
352 |
+
args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
|
353 |
+
printf("=======================================\n");
|
354 |
+
|
355 |
+
cv::Mat img = post_process(frame, outdata0);
|
356 |
+
cv::imwrite("./results.png", img);
|
357 |
+
fast_interpreter->destory();
|
358 |
+
return 0;
|
359 |
+
}
|
360 |
+
|
361 |
+
|
362 |
+
int main(int argc, char* argv[]) {
|
363 |
+
Args args = parse_args(argc, argv);
|
364 |
+
return invoke(args);
|
365 |
+
}
|
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e67a07dc0454b16d3363f2b0c92dcc87a10e3dc895fc1571b33bc2df53e3a81
|
3 |
+
size 36449096
|
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png
ADDED
![]() |
Git LFS Details
|