qc903113684 commited on
Commit
0a8a4ff
·
verified ·
1 Parent(s): f6ca579

Upload 64 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md +55 -0
  3. model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png +3 -0
  4. model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +31 -0
  5. model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp +365 -0
  6. model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin +3 -0
  7. model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png +3 -0
  8. model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py +133 -0
  9. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md +55 -0
  10. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png +3 -0
  11. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +31 -0
  12. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +365 -0
  13. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt +3 -0
  14. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth +3 -0
  15. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin +3 -0
  16. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg +0 -0
  17. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png +3 -0
  18. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py +11 -0
  19. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py +133 -0
  20. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py +44 -0
  21. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py +10 -0
  22. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc +0 -0
  23. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc +0 -0
  24. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc +0 -0
  25. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc +0 -0
  26. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc +0 -0
  27. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc +0 -0
  28. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc +0 -0
  29. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc +0 -0
  30. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc +0 -0
  31. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc +0 -0
  32. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc +0 -0
  33. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc +0 -0
  34. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py +318 -0
  35. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py +157 -0
  36. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py +80 -0
  37. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py +119 -0
  38. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py +555 -0
  39. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py +663 -0
  40. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py +235 -0
  41. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py +385 -0
  42. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py +32 -0
  43. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py +593 -0
  44. model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py +38 -0
  45. model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md +55 -0
  46. model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png +3 -0
  47. model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt +31 -0
  48. model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp +365 -0
  49. model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin +3 -0
  50. model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png +3 -0
.gitattributes CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
37
+ model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
38
+ model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
39
+ model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
40
+ model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
41
+ model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
42
+ model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
43
+ model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
44
+ model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
45
+ model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 1x3x512x1024
4
+ - Number of parameters:18.04M
5
+ - Model size:69.4MB,
6
+ - Output shape: 1x19x64x128
7
+
8
+ Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### c++
49
+ ```bash
50
+ cd ffnet54s/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp
51
+ mkdir build && cd build
52
+ cmake ..
53
+ make
54
+ ./run_test
55
+ ```
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+
6
+ message(STATUS "oPENCV Library status:")
7
+ message(STATUS ">version:${OpenCV_VERSION}")
8
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
9
+
10
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
11
+
12
+ include_directories(
13
+ /usr/local/include
14
+ /usr/include/opencv4
15
+ )
16
+
17
+ link_directories(
18
+ /usr/local/lib/
19
+ )
20
+
21
+ file(GLOB SRC_LISTS
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
23
+ )
24
+
25
+ add_executable(run_test ${SRC_LISTS})
26
+
27
+ target_link_libraries(run_test
28
+ aidlite
29
+ ${OpenCV_LIBS}
30
+ pthread
31
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <algorithm>
4
+ #include <cctype>
5
+ #include <cstring> // 用于 memcpy
6
+ #include <opencv2/opencv.hpp>
7
+ #include <aidlux/aidlite/aidlite.hpp>
8
+ #include <vector>
9
+ #include <numeric>
10
+ #include <cmath>
11
+ #include <array>
12
+ #include <cstdint>
13
+
14
+ using namespace cv;
15
+ using namespace std;
16
+ using namespace Aidlux::Aidlite;
17
+
18
+ // 定義顏色表 (19個類別)
19
+ const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
20
+ {0, 0, 0}, // 0=background
21
+ {128, 0, 0}, // 1=aeroplane
22
+ {0, 128, 0}, // 2=bicycle
23
+ {128, 128, 0}, // 3=bird
24
+ {0, 0, 128}, // 4=boat
25
+ {128, 0, 128}, // 5=bottle
26
+ {0, 128, 128}, // 6=bus
27
+ {128, 128, 128}, // 7=car
28
+ {64, 0, 0}, // 8=cat
29
+ {192, 0, 0}, // 9=chair
30
+ {64, 128, 0}, // 10=cow
31
+ {192, 128, 0}, // 11=dining table
32
+ {64, 0, 128}, // 12=dog
33
+ {192, 0, 128}, // 13=horse
34
+ {64, 128, 128}, // 14=motorbike
35
+ {192, 128, 128}, // 15=person
36
+ {0, 64, 0}, // 16=potted plant
37
+ {128, 64, 0}, // 17=sheep
38
+ {0, 192, 0}, // 18=sofa
39
+ }};
40
+
41
+ // 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
42
+ std::vector<float> bilinear_interpolate(
43
+ const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
44
+
45
+ std::vector<float> output(target_h * target_w * channels, 0.0f);
46
+ const float scale_h = static_cast<float>(src_h) / target_h;
47
+ const float scale_w = static_cast<float>(src_w) / target_w;
48
+
49
+ for (int y = 0; y < target_h; ++y) {
50
+ const float y_src = (y + 0.5f) * scale_h - 0.5f;
51
+ const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
52
+ const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
53
+ const float dy = y_src - y0;
54
+
55
+ for (int x = 0; x < target_w; ++x) {
56
+ const float x_src = (x + 0.5f) * scale_w - 0.5f;
57
+ const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
58
+ const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
59
+ const float dx = x_src - x0;
60
+
61
+ for (int c = 0; c < channels; ++c) {
62
+ const int src_idx = c * src_h * src_w;
63
+ const float val00 = input[src_idx + y0 * src_w + x0];
64
+ const float val01 = input[src_idx + y0 * src_w + x1];
65
+ const float val10 = input[src_idx + y1 * src_w + x0];
66
+ const float val11 = input[src_idx + y1 * src_w + x1];
67
+
68
+ const float val = (1 - dy) * (1 - dx) * val00 +
69
+ (1 - dy) * dx * val01 +
70
+ dy * (1 - dx) * val10 +
71
+ dy * dx * val11;
72
+
73
+ output[c * target_h * target_w + y * target_w + x] = val;
74
+ }
75
+ }
76
+ }
77
+ return output;
78
+ }
79
+
80
+ // Softmax 計算 (通道維度)
81
+ void softmax(float* data, int height, int width, int channels) {
82
+ for (int y = 0; y < height; ++y) {
83
+ for (int x = 0; x < width; ++x) {
84
+ float max_val = -INFINITY;
85
+ for (int c = 0; c < channels; ++c) {
86
+ const int idx = c * height * width + y * width + x;
87
+ max_val = std::max(max_val, data[idx]);
88
+ }
89
+
90
+ float sum_exp = 0.0f;
91
+ for (int c = 0; c < channels; ++c) {
92
+ const int idx = c * height * width + y * width + x;
93
+ sum_exp += std::exp(data[idx] - max_val);
94
+ }
95
+
96
+ for (int c = 0; c < channels; ++c) {
97
+ const int idx = c * height * width + y * width + x;
98
+ data[idx] = std::exp(data[idx] - max_val) / sum_exp;
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ // 提取最大類別索引
105
+ std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
106
+ std::vector<uint8_t> pred(height * width);
107
+ for (int y = 0; y < height; ++y) {
108
+ for (int x = 0; x < width; ++x) {
109
+ float max_prob = -INFINITY;
110
+ uint8_t max_idx = 0;
111
+ for (int c = 0; c < channels; ++c) {
112
+ const int idx = c * height * width + y * width + x;
113
+ if (data[idx] > max_prob) {
114
+ max_prob = data[idx];
115
+ max_idx = c;
116
+ }
117
+ }
118
+ pred[y * width + x] = max_idx;
119
+ }
120
+ }
121
+ return pred;
122
+ }
123
+
124
+ // 解碼為 RGB 圖像
125
+ std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
126
+ std::vector<uint8_t> rgb(height * width * 3);
127
+ for (int y = 0; y < height; ++y) {
128
+ for (int x = 0; x < width; ++x) {
129
+ const int idx = y * width + x;
130
+ const uint8_t label = pred[idx];
131
+ if (label < 19) {
132
+ rgb[idx * 3] = label_colors[label][0];
133
+ rgb[idx * 3 + 1] = label_colors[label][1];
134
+ rgb[idx * 3 + 2] = label_colors[label][2];
135
+ } else {
136
+ rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
137
+ }
138
+ }
139
+ }
140
+ return rgb;
141
+ }
142
+
143
+ struct Args {
144
+ std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
145
+ std::string imgs = "../2.png";
146
+ int invoke_nums = 10;
147
+ std::string model_type = "QNN";
148
+ };
149
+
150
+ Args parse_args(int argc, char* argv[]) {
151
+ Args args;
152
+ for (int i = 1; i < argc; ++i) {
153
+ std::string arg = argv[i];
154
+ if (arg == "--target_model" && i + 1 < argc) {
155
+ args.target_model = argv[++i];
156
+ } else if (arg == "--imgs" && i + 1 < argc) {
157
+ args.imgs = argv[++i];
158
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
159
+ args.invoke_nums = std::stoi(argv[++i]);
160
+ } else if (arg == "--model_type" && i + 1 < argc) {
161
+ args.model_type = argv[++i];
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+
167
+ std::string to_lower(const std::string& str) {
168
+ std::string lower_str = str;
169
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
170
+ return std::tolower(c);
171
+ });
172
+ return lower_str;
173
+ }
174
+
175
+ int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
176
+
177
+ int current_coordinate[4] = {0, 0, 0, 0};
178
+ for(int a = 0; a < src_dims[0]; ++a){
179
+ current_coordinate[0] = a;
180
+ for(int b = 0; b < src_dims[1]; ++b){
181
+ current_coordinate[1] = b;
182
+ for(int c = 0; c < src_dims[2]; ++c){
183
+ current_coordinate[2] = c;
184
+ for(int d = 0; d < src_dims[3]; ++d){
185
+ current_coordinate[3] = d;
186
+
187
+ int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
188
+ current_coordinate[1]*src_dims[2]*src_dims[3] +
189
+ current_coordinate[2]*src_dims[3] +
190
+ current_coordinate[3];
191
+
192
+ int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
193
+ current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
194
+ current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
195
+ current_coordinate[tsp_dims[3]];
196
+
197
+ dest[new_index] = src[old_index];
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ return EXIT_SUCCESS;
204
+ }
205
+
206
+ cv::Mat post_process(cv::Mat &frame, float* outdata)
207
+ {
208
+ cv::Mat input_image = frame.clone();
209
+ // Initialize vectors to hold respective outputs while unwrapping detections.
210
+ std::vector<int> class_ids;
211
+ std::vector<float> confidences;
212
+ std::vector<cv::Rect> boxes;
213
+ std::vector<cv::Mat> masks;
214
+ std::vector<float> class_scores;
215
+ cv::RNG rng;
216
+ cv::Mat masked_img;
217
+
218
+ unsigned int src_dims[4] = {1, 64,128,19};
219
+ unsigned int tsp_dims[4] = {0,3,1,2};
220
+ unsigned int stride_data_num = 1*64*128*19;
221
+ float* format_data = new float[stride_data_num];
222
+ transpose(outdata, src_dims, tsp_dims, format_data);
223
+ cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
224
+ std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
225
+
226
+ const int channels = 19;
227
+ int target_h = 64, target_w = 128;
228
+ int src_h = 64, src_w = 128;
229
+ // Step 1: 雙線性插值
230
+ auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
231
+
232
+ // Step 2: Softmax
233
+ softmax(interpolated.data(), target_h, target_w, channels);
234
+
235
+ // Step 3: 獲取預測類別
236
+ auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
237
+
238
+ printf("Processing finished.\n");
239
+ // Step 4: 解碼為 RGB
240
+ std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
241
+ cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
242
+
243
+ // Step 2: 转换颜色通道 (RGB → BGR)
244
+ if (channels == 3) {
245
+ cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
246
+ }
247
+ cv::Mat resized_cubic;
248
+ cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
249
+ return resized_cubic;
250
+ }
251
+
252
+
253
+ int invoke(const Args& args) {
254
+ std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
255
+ << "Image Path: " << args.imgs << "\n"
256
+ << "Inference Nums: " << args.invoke_nums << "\n"
257
+ << "Model Type: " << args.model_type << "\n";
258
+ Model* model = Model::create_instance(args.target_model);
259
+ if(model == nullptr){
260
+ printf("Create model failed !\n");
261
+ return EXIT_FAILURE;
262
+ }
263
+ Config* config = Config::create_instance();
264
+ if(config == nullptr){
265
+ printf("Create config failed !\n");
266
+ return EXIT_FAILURE;
267
+ }
268
+ config->implement_type = ImplementType::TYPE_LOCAL;
269
+ std::string model_type_lower = to_lower(args.model_type);
270
+ if (model_type_lower == "qnn"){
271
+ config->framework_type = FrameworkType::TYPE_QNN;
272
+ } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
273
+ config->framework_type = FrameworkType::TYPE_SNPE2;
274
+ }
275
+ config->accelerate_type = AccelerateType::TYPE_DSP;
276
+ config->is_quantify_model = 1;
277
+
278
+ unsigned int model_h = 512;
279
+ unsigned int model_w = 1024;
280
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
281
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
282
+ model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
283
+ std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
284
+ if(fast_interpreter == nullptr){
285
+ printf("build_interpretper_from_model_and_config failed !\n");
286
+ return EXIT_FAILURE;
287
+ }
288
+ int result = fast_interpreter->init();
289
+ if(result != EXIT_SUCCESS){
290
+ printf("interpreter->init() failed !\n");
291
+ return EXIT_FAILURE;
292
+ }
293
+ // load model
294
+ fast_interpreter->load_model();
295
+ if(result != EXIT_SUCCESS){
296
+ printf("interpreter->load_model() failed !\n");
297
+ return EXIT_FAILURE;
298
+ }
299
+ printf("detect model load success!\n");
300
+
301
+ cv::Mat frame = cv::imread(args.imgs);
302
+ if (frame.empty()) {
303
+ printf("detect image load failed!\n");
304
+ return 1;
305
+ }
306
+ printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
307
+ cv::Mat input_data;
308
+ cv::Mat frame_clone = frame.clone();
309
+ cv::Scalar stds_scale(58.395, 57.12, 57.375);
310
+ cv::Scalar means_scale(123.675, 116.28, 103.53);
311
+ cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
312
+ frame_clone.convertTo(input_data, CV_32F);
313
+ cv::subtract(input_data, means_scale, input_data);
314
+ cv::divide(input_data, stds_scale, input_data);
315
+
316
+ float *outdata0 = nullptr;
317
+ std::vector<float> invoke_time;
318
+ for (int i = 0; i < args.invoke_nums; ++i) {
319
+ result = fast_interpreter->set_input_tensor(0, input_data.data);
320
+ if(result != EXIT_SUCCESS){
321
+ printf("interpreter->set_input_tensor() failed !\n");
322
+ return EXIT_FAILURE;
323
+ }
324
+ auto t1 = std::chrono::high_resolution_clock::now();
325
+ result = fast_interpreter->invoke();
326
+ auto t2 = std::chrono::high_resolution_clock::now();
327
+ std::chrono::duration<double> cost_time = t2 - t1;
328
+ invoke_time.push_back(cost_time.count() * 1000);
329
+ if(result != EXIT_SUCCESS){
330
+ printf("interpreter->invoke() failed !\n");
331
+ return EXIT_FAILURE;
332
+ }
333
+ uint32_t out_data_0 = 0;
334
+ result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
335
+ if(result != EXIT_SUCCESS){
336
+ printf("interpreter->get_output_tensor() 1 failed !\n");
337
+ return EXIT_FAILURE;
338
+ }
339
+
340
+ }
341
+
342
+ float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
343
+ float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
344
+ float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
345
+ float var_invoketime = 0.0f;
346
+ for (auto time : invoke_time) {
347
+ var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
348
+ }
349
+ var_invoketime /= args.invoke_nums;
350
+ printf("=======================================\n");
351
+ printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
352
+ args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
353
+ printf("=======================================\n");
354
+
355
+ cv::Mat img = post_process(frame, outdata0);
356
+ cv::imwrite("./results.png", img);
357
+ fast_interpreter->destory();
358
+ return 0;
359
+ }
360
+
361
+
362
+ int main(int argc, char* argv[]) {
363
+ Args args = parse_args(argc, argv);
364
+ return invoke(args);
365
+ }
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce910deb26ab4b1c9fb1c77e37b12b913473b18ac59c9ca0b45d65f212292d2
3
+ size 18336944
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ import time
6
+ import aidlite
7
+ import os
8
+
9
+
10
+ def decode_segmap(image, nc=19):
11
+ label_colors = np.array([(0, 0, 0), # 0=background
12
+ # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
13
+ (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
14
+ # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
15
+ (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
16
+ # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
17
+ (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
18
+ # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
19
+ (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
20
+ r = np.zeros_like(image).astype(np.uint8)
21
+ g = np.zeros_like(image).astype(np.uint8)
22
+ b = np.zeros_like(image).astype(np.uint8)
23
+ for l in range(0, nc):
24
+ idx = image == l
25
+ r[idx] = label_colors[l, 0]
26
+ g[idx] = label_colors[l, 1]
27
+ b[idx] = label_colors[l, 2]
28
+ rgb = np.stack([r, g, b], axis=2)
29
+ return rgb
30
+
31
+
32
+
33
+ class ffnet54sQnn:
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
37
+ if self.model is None:
38
+ print("Create model failed !")
39
+ return
40
+
41
+ self.config = aidlite.Config.create_instance()
42
+ if self.config is None:
43
+ print("build_interpretper_from_model_and_config failed !")
44
+ return
45
+
46
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
47
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
48
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
49
+ # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
50
+ self.config.is_quantify_model = 1
51
+
52
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
53
+ if self.interpreter is None:
54
+ print("build_interpretper_from_model_and_config failed !")
55
+ return
56
+ input_shapes = [[1,512,1024,3]]
57
+ output_shapes = [[1,64,128,19]]
58
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
59
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
60
+
61
+ if self.interpreter is None:
62
+ print("build_interpretper_from_model_and_config failed !")
63
+ result = self.interpreter.init()
64
+ if result != 0:
65
+ print(f"interpreter init failed !")
66
+ result = self.interpreter.load_model()
67
+ if result != 0:
68
+ print("interpreter load model failed !")
69
+
70
+ print(" model load success!")
71
+
72
+ def __call__(self, input):
73
+ self.interpreter.set_input_tensor(0,input)
74
+ invoke_time=[]
75
+ invoke_nums =10
76
+ for i in range(invoke_nums):
77
+ result = self.interpreter.set_input_tensor(0, input.data)
78
+ if result != 0:
79
+ print("interpreter set_input_tensor() failed")
80
+ t1=time.time()
81
+ result = self.interpreter.invoke()
82
+ cost_time = (time.time()-t1)*1000
83
+ invoke_time.append(cost_time)
84
+
85
+ max_invoke_time = max(invoke_time)
86
+ min_invoke_time = min(invoke_time)
87
+ mean_invoke_time = sum(invoke_time)/invoke_nums
88
+ var_invoketime=np.var(invoke_time)
89
+ print("====================================")
90
+ print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
91
+ print("====================================")
92
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
93
+ return features_0
94
+
95
+
96
+
97
+
98
+
99
+ ffnet_segm = ffnet54sQnn()
100
+
101
+ frame_ct=0
102
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
103
+
104
+ image = cv2.imread(image_path)
105
+ image=cv2.resize(image,(1024,512))
106
+ frame = np.ascontiguousarray(image[:,:,::-1])
107
+
108
+ mean_data=[123.675, 116.28, 103.53]
109
+ std_data=[58.395, 57.12, 57.375]
110
+ img_input = (frame-mean_data)/std_data # HWC
111
+ input = img_input.astype(np.float32)
112
+ input = input[np.newaxis, ...]
113
+ input_size = input.shape[1], input.shape[2] #H w
114
+ t0 = time.time()
115
+ out = ffnet_segm(input)
116
+ use_time = round((time.time() - t0) * 1000, 2)
117
+ print(f"pose detction inference_time:{use_time} ms")
118
+ out = np.transpose(out, (0, 3, 1,2))
119
+ out = torch.from_numpy(out)
120
+
121
+ output = torch.nn.functional.interpolate(
122
+ out, size=input_size, mode="bilinear", align_corners=False
123
+ )
124
+ output_data = torch.nn.functional.softmax(output, dim=1).data
125
+ max_probs, predictions = output_data.max(1)
126
+
127
+
128
+ prediction = predictions.numpy().astype(np.uint8)
129
+ test = decode_segmap( prediction[0])
130
+
131
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
132
+ ffnet_segm.interpreter.destory()
133
+
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 1x3x512x1024
4
+ - Number of parameters:18.04M
5
+ - Model size:69.4MB,
6
+ - Output shape: 1x19x64x128
7
+
8
+ Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: W8A16
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### c++
49
+ ```bash
50
+ cd ffnet54s/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp
51
+ mkdir build && cd build
52
+ cmake ..
53
+ make
54
+ ./run_test
55
+ ```
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+
6
+ message(STATUS "oPENCV Library status:")
7
+ message(STATUS ">version:${OpenCV_VERSION}")
8
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
9
+
10
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
11
+
12
+ include_directories(
13
+ /usr/local/include
14
+ /usr/include/opencv4
15
+ )
16
+
17
+ link_directories(
18
+ /usr/local/lib/
19
+ )
20
+
21
+ file(GLOB SRC_LISTS
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
23
+ )
24
+
25
+ add_executable(run_test ${SRC_LISTS})
26
+
27
+ target_link_libraries(run_test
28
+ aidlite
29
+ ${OpenCV_LIBS}
30
+ pthread
31
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <algorithm>
4
+ #include <cctype>
5
+ #include <cstring> // 用于 memcpy
6
+ #include <opencv2/opencv.hpp>
7
+ #include <aidlux/aidlite/aidlite.hpp>
8
+ #include <vector>
9
+ #include <numeric>
10
+ #include <cmath>
11
+ #include <array>
12
+ #include <cstdint>
13
+
14
+ using namespace cv;
15
+ using namespace std;
16
+ using namespace Aidlux::Aidlite;
17
+
18
+ // 定義顏色表 (19個類別)
19
+ const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
20
+ {0, 0, 0}, // 0=background
21
+ {128, 0, 0}, // 1=aeroplane
22
+ {0, 128, 0}, // 2=bicycle
23
+ {128, 128, 0}, // 3=bird
24
+ {0, 0, 128}, // 4=boat
25
+ {128, 0, 128}, // 5=bottle
26
+ {0, 128, 128}, // 6=bus
27
+ {128, 128, 128}, // 7=car
28
+ {64, 0, 0}, // 8=cat
29
+ {192, 0, 0}, // 9=chair
30
+ {64, 128, 0}, // 10=cow
31
+ {192, 128, 0}, // 11=dining table
32
+ {64, 0, 128}, // 12=dog
33
+ {192, 0, 128}, // 13=horse
34
+ {64, 128, 128}, // 14=motorbike
35
+ {192, 128, 128}, // 15=person
36
+ {0, 64, 0}, // 16=potted plant
37
+ {128, 64, 0}, // 17=sheep
38
+ {0, 192, 0}, // 18=sofa
39
+ }};
40
+
41
+ // 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
42
+ std::vector<float> bilinear_interpolate(
43
+ const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
44
+
45
+ std::vector<float> output(target_h * target_w * channels, 0.0f);
46
+ const float scale_h = static_cast<float>(src_h) / target_h;
47
+ const float scale_w = static_cast<float>(src_w) / target_w;
48
+
49
+ for (int y = 0; y < target_h; ++y) {
50
+ const float y_src = (y + 0.5f) * scale_h - 0.5f;
51
+ const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
52
+ const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
53
+ const float dy = y_src - y0;
54
+
55
+ for (int x = 0; x < target_w; ++x) {
56
+ const float x_src = (x + 0.5f) * scale_w - 0.5f;
57
+ const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
58
+ const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
59
+ const float dx = x_src - x0;
60
+
61
+ for (int c = 0; c < channels; ++c) {
62
+ const int src_idx = c * src_h * src_w;
63
+ const float val00 = input[src_idx + y0 * src_w + x0];
64
+ const float val01 = input[src_idx + y0 * src_w + x1];
65
+ const float val10 = input[src_idx + y1 * src_w + x0];
66
+ const float val11 = input[src_idx + y1 * src_w + x1];
67
+
68
+ const float val = (1 - dy) * (1 - dx) * val00 +
69
+ (1 - dy) * dx * val01 +
70
+ dy * (1 - dx) * val10 +
71
+ dy * dx * val11;
72
+
73
+ output[c * target_h * target_w + y * target_w + x] = val;
74
+ }
75
+ }
76
+ }
77
+ return output;
78
+ }
79
+
80
+ // Softmax 計算 (通道維度)
81
+ void softmax(float* data, int height, int width, int channels) {
82
+ for (int y = 0; y < height; ++y) {
83
+ for (int x = 0; x < width; ++x) {
84
+ float max_val = -INFINITY;
85
+ for (int c = 0; c < channels; ++c) {
86
+ const int idx = c * height * width + y * width + x;
87
+ max_val = std::max(max_val, data[idx]);
88
+ }
89
+
90
+ float sum_exp = 0.0f;
91
+ for (int c = 0; c < channels; ++c) {
92
+ const int idx = c * height * width + y * width + x;
93
+ sum_exp += std::exp(data[idx] - max_val);
94
+ }
95
+
96
+ for (int c = 0; c < channels; ++c) {
97
+ const int idx = c * height * width + y * width + x;
98
+ data[idx] = std::exp(data[idx] - max_val) / sum_exp;
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ // 提取最大類別索引
105
+ std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
106
+ std::vector<uint8_t> pred(height * width);
107
+ for (int y = 0; y < height; ++y) {
108
+ for (int x = 0; x < width; ++x) {
109
+ float max_prob = -INFINITY;
110
+ uint8_t max_idx = 0;
111
+ for (int c = 0; c < channels; ++c) {
112
+ const int idx = c * height * width + y * width + x;
113
+ if (data[idx] > max_prob) {
114
+ max_prob = data[idx];
115
+ max_idx = c;
116
+ }
117
+ }
118
+ pred[y * width + x] = max_idx;
119
+ }
120
+ }
121
+ return pred;
122
+ }
123
+
124
+ // 解碼為 RGB 圖像
125
+ std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
126
+ std::vector<uint8_t> rgb(height * width * 3);
127
+ for (int y = 0; y < height; ++y) {
128
+ for (int x = 0; x < width; ++x) {
129
+ const int idx = y * width + x;
130
+ const uint8_t label = pred[idx];
131
+ if (label < 19) {
132
+ rgb[idx * 3] = label_colors[label][0];
133
+ rgb[idx * 3 + 1] = label_colors[label][1];
134
+ rgb[idx * 3 + 2] = label_colors[label][2];
135
+ } else {
136
+ rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
137
+ }
138
+ }
139
+ }
140
+ return rgb;
141
+ }
142
+
143
+ struct Args {
144
+ std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
145
+ std::string imgs = "../2.png";
146
+ int invoke_nums = 10;
147
+ std::string model_type = "QNN";
148
+ };
149
+
150
+ Args parse_args(int argc, char* argv[]) {
151
+ Args args;
152
+ for (int i = 1; i < argc; ++i) {
153
+ std::string arg = argv[i];
154
+ if (arg == "--target_model" && i + 1 < argc) {
155
+ args.target_model = argv[++i];
156
+ } else if (arg == "--imgs" && i + 1 < argc) {
157
+ args.imgs = argv[++i];
158
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
159
+ args.invoke_nums = std::stoi(argv[++i]);
160
+ } else if (arg == "--model_type" && i + 1 < argc) {
161
+ args.model_type = argv[++i];
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+
167
+ std::string to_lower(const std::string& str) {
168
+ std::string lower_str = str;
169
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
170
+ return std::tolower(c);
171
+ });
172
+ return lower_str;
173
+ }
174
+
175
+ int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
176
+
177
+ int current_coordinate[4] = {0, 0, 0, 0};
178
+ for(int a = 0; a < src_dims[0]; ++a){
179
+ current_coordinate[0] = a;
180
+ for(int b = 0; b < src_dims[1]; ++b){
181
+ current_coordinate[1] = b;
182
+ for(int c = 0; c < src_dims[2]; ++c){
183
+ current_coordinate[2] = c;
184
+ for(int d = 0; d < src_dims[3]; ++d){
185
+ current_coordinate[3] = d;
186
+
187
+ int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
188
+ current_coordinate[1]*src_dims[2]*src_dims[3] +
189
+ current_coordinate[2]*src_dims[3] +
190
+ current_coordinate[3];
191
+
192
+ int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
193
+ current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
194
+ current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
195
+ current_coordinate[tsp_dims[3]];
196
+
197
+ dest[new_index] = src[old_index];
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ return EXIT_SUCCESS;
204
+ }
205
+
206
+ cv::Mat post_process(cv::Mat &frame, float* outdata)
207
+ {
208
+ cv::Mat input_image = frame.clone();
209
+ // Initialize vectors to hold respective outputs while unwrapping detections.
210
+ std::vector<int> class_ids;
211
+ std::vector<float> confidences;
212
+ std::vector<cv::Rect> boxes;
213
+ std::vector<cv::Mat> masks;
214
+ std::vector<float> class_scores;
215
+ cv::RNG rng;
216
+ cv::Mat masked_img;
217
+
218
+ unsigned int src_dims[4] = {1, 64,128,19};
219
+ unsigned int tsp_dims[4] = {0,3,1,2};
220
+ unsigned int stride_data_num = 1*64*128*19;
221
+ float* format_data = new float[stride_data_num];
222
+ transpose(outdata, src_dims, tsp_dims, format_data);
223
+ cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
224
+ std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
225
+
226
+ const int channels = 19;
227
+ int target_h = 64, target_w = 128;
228
+ int src_h = 64, src_w = 128;
229
+ // Step 1: 雙線性插值
230
+ auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
231
+
232
+ // Step 2: Softmax
233
+ softmax(interpolated.data(), target_h, target_w, channels);
234
+
235
+ // Step 3: 獲取預測類別
236
+ auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
237
+
238
+ printf("Processing finished.\n");
239
+ // Step 4: 解碼為 RGB
240
+ std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
241
+ cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
242
+
243
+ // Step 2: 转换颜色通道 (RGB → BGR)
244
+ if (channels == 3) {
245
+ cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
246
+ }
247
+ cv::Mat resized_cubic;
248
+ cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
249
+ return resized_cubic;
250
+ }
251
+
252
+
253
+ int invoke(const Args& args) {
254
+ std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
255
+ << "Image Path: " << args.imgs << "\n"
256
+ << "Inference Nums: " << args.invoke_nums << "\n"
257
+ << "Model Type: " << args.model_type << "\n";
258
+ Model* model = Model::create_instance(args.target_model);
259
+ if(model == nullptr){
260
+ printf("Create model failed !\n");
261
+ return EXIT_FAILURE;
262
+ }
263
+ Config* config = Config::create_instance();
264
+ if(config == nullptr){
265
+ printf("Create config failed !\n");
266
+ return EXIT_FAILURE;
267
+ }
268
+ config->implement_type = ImplementType::TYPE_LOCAL;
269
+ std::string model_type_lower = to_lower(args.model_type);
270
+ if (model_type_lower == "qnn"){
271
+ config->framework_type = FrameworkType::TYPE_QNN;
272
+ } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
273
+ config->framework_type = FrameworkType::TYPE_SNPE2;
274
+ }
275
+ config->accelerate_type = AccelerateType::TYPE_DSP;
276
+ config->is_quantify_model = 1;
277
+
278
+ unsigned int model_h = 512;
279
+ unsigned int model_w = 1024;
280
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
281
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
282
+ model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
283
+ std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
284
+ if(fast_interpreter == nullptr){
285
+ printf("build_interpretper_from_model_and_config failed !\n");
286
+ return EXIT_FAILURE;
287
+ }
288
+ int result = fast_interpreter->init();
289
+ if(result != EXIT_SUCCESS){
290
+ printf("interpreter->init() failed !\n");
291
+ return EXIT_FAILURE;
292
+ }
293
+ // load model
294
+ fast_interpreter->load_model();
295
+ if(result != EXIT_SUCCESS){
296
+ printf("interpreter->load_model() failed !\n");
297
+ return EXIT_FAILURE;
298
+ }
299
+ printf("detect model load success!\n");
300
+
301
+ cv::Mat frame = cv::imread(args.imgs);
302
+ if (frame.empty()) {
303
+ printf("detect image load failed!\n");
304
+ return 1;
305
+ }
306
+ printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
307
+ cv::Mat input_data;
308
+ cv::Mat frame_clone = frame.clone();
309
+ cv::Scalar stds_scale(58.395, 57.12, 57.375);
310
+ cv::Scalar means_scale(123.675, 116.28, 103.53);
311
+ cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
312
+ frame_clone.convertTo(input_data, CV_32F);
313
+ cv::subtract(input_data, means_scale, input_data);
314
+ cv::divide(input_data, stds_scale, input_data);
315
+
316
+ float *outdata0 = nullptr;
317
+ std::vector<float> invoke_time;
318
+ for (int i = 0; i < args.invoke_nums; ++i) {
319
+ result = fast_interpreter->set_input_tensor(0, input_data.data);
320
+ if(result != EXIT_SUCCESS){
321
+ printf("interpreter->set_input_tensor() failed !\n");
322
+ return EXIT_FAILURE;
323
+ }
324
+ auto t1 = std::chrono::high_resolution_clock::now();
325
+ result = fast_interpreter->invoke();
326
+ auto t2 = std::chrono::high_resolution_clock::now();
327
+ std::chrono::duration<double> cost_time = t2 - t1;
328
+ invoke_time.push_back(cost_time.count() * 1000);
329
+ if(result != EXIT_SUCCESS){
330
+ printf("interpreter->invoke() failed !\n");
331
+ return EXIT_FAILURE;
332
+ }
333
+ uint32_t out_data_0 = 0;
334
+ result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
335
+ if(result != EXIT_SUCCESS){
336
+ printf("interpreter->get_output_tensor() 1 failed !\n");
337
+ return EXIT_FAILURE;
338
+ }
339
+
340
+ }
341
+
342
+ float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
343
+ float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
344
+ float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
345
+ float var_invoketime = 0.0f;
346
+ for (auto time : invoke_time) {
347
+ var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
348
+ }
349
+ var_invoketime /= args.invoke_nums;
350
+ printf("=======================================\n");
351
+ printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
352
+ args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
353
+ printf("=======================================\n");
354
+
355
+ cv::Mat img = post_process(frame, outdata0);
356
+ cv::imwrite("./results.png", img);
357
+ fast_interpreter->destory();
358
+ return 0;
359
+ }
360
+
361
+
362
+ int main(int argc, char* argv[]) {
363
+ Args args = parse_args(argc, argv);
364
+ return invoke(args);
365
+ }
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3065b3055672fb4f52f561a8ffb6ccb03e501480335f2f5f97d8cfaa6f0a4c
3
+ size 72810122
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:597f83804cb9866c784b3d99209ee9e3b8b1f0b4f838c022a934ae5726f58218
3
+ size 72423358
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ab3604cdedd3dc8ff34698bd15a197690df0511eae6e4856da89187fe7d17f1
3
+ size 18537648
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg ADDED
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ imagenet_base_path = None
5
+ cityscapes_base_path = None
6
+ model_weights_base_path = None
7
+
8
+ CITYSCAPES_MEAN = [0.485, 0.456, 0.406]
9
+ CITYSCAPES_STD = [0.229, 0.224, 0.225]
10
+ CITYSCAPES_NUM_CLASSES = 19
11
+ CITYSCAPES_IGNORE_LABEL = 255
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ import time
6
+ import aidlite
7
+ import os
8
+
9
+
10
+ def decode_segmap(image, nc=19):
11
+ label_colors = np.array([(0, 0, 0), # 0=background
12
+ # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
13
+ (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
14
+ # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
15
+ (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
16
+ # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
17
+ (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
18
+ # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
19
+ (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
20
+ r = np.zeros_like(image).astype(np.uint8)
21
+ g = np.zeros_like(image).astype(np.uint8)
22
+ b = np.zeros_like(image).astype(np.uint8)
23
+ for l in range(0, nc):
24
+ idx = image == l
25
+ r[idx] = label_colors[l, 0]
26
+ g[idx] = label_colors[l, 1]
27
+ b[idx] = label_colors[l, 2]
28
+ rgb = np.stack([r, g, b], axis=2)
29
+ return rgb
30
+
31
+
32
+
33
+ class ffnet54sQnn:
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
37
+ if self.model is None:
38
+ print("Create model failed !")
39
+ return
40
+
41
+ self.config = aidlite.Config.create_instance()
42
+ if self.config is None:
43
+ print("build_interpretper_from_model_and_config failed !")
44
+ return
45
+
46
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
47
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
48
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
49
+ # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
50
+ self.config.is_quantify_model = 1
51
+
52
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
53
+ if self.interpreter is None:
54
+ print("build_interpretper_from_model_and_config failed !")
55
+ return
56
+ input_shapes = [[1,512,1024,3]]
57
+ output_shapes = [[1,64,128,19]]
58
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
59
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
60
+
61
+ if self.interpreter is None:
62
+ print("build_interpretper_from_model_and_config failed !")
63
+ result = self.interpreter.init()
64
+ if result != 0:
65
+ print(f"interpreter init failed !")
66
+ result = self.interpreter.load_model()
67
+ if result != 0:
68
+ print("interpreter load model failed !")
69
+
70
+ print(" model load success!")
71
+
72
+ def __call__(self, input):
73
+ self.interpreter.set_input_tensor(0,input)
74
+ invoke_time=[]
75
+ invoke_nums =10
76
+ for i in range(invoke_nums):
77
+ result = self.interpreter.set_input_tensor(0, input.data)
78
+ if result != 0:
79
+ print("interpreter set_input_tensor() failed")
80
+ t1=time.time()
81
+ result = self.interpreter.invoke()
82
+ cost_time = (time.time()-t1)*1000
83
+ invoke_time.append(cost_time)
84
+
85
+ max_invoke_time = max(invoke_time)
86
+ min_invoke_time = min(invoke_time)
87
+ mean_invoke_time = sum(invoke_time)/invoke_nums
88
+ var_invoketime=np.var(invoke_time)
89
+ print("====================================")
90
+ print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
91
+ print("====================================")
92
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
93
+ return features_0
94
+
95
+
96
+
97
+
98
+
99
+ ffnet_segm = ffnet54sQnn()
100
+
101
+ frame_ct=0
102
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
103
+
104
+ image = cv2.imread(image_path)
105
+ image=cv2.resize(image,(1024,512))
106
+ frame = np.ascontiguousarray(image[:,:,::-1])
107
+
108
+ mean_data=[123.675, 116.28, 103.53]
109
+ std_data=[58.395, 57.12, 57.375]
110
+ img_input = (frame-mean_data)/std_data # HWC
111
+ input = img_input.astype(np.float32)
112
+ input = input[np.newaxis, ...]
113
+ input_size = input.shape[1], input.shape[2] #H w
114
+ t0 = time.time()
115
+ out = ffnet_segm(input)
116
+ use_time = round((time.time() - t0) * 1000, 2)
117
+ print(f"pose detction inference_time:{use_time} ms")
118
+ out = np.transpose(out, (0, 3, 1,2))
119
+ out = torch.from_numpy(out)
120
+
121
+ output = torch.nn.functional.interpolate(
122
+ out, size=input_size, mode="bilinear", align_corners=False
123
+ )
124
+ output_data = torch.nn.functional.softmax(output, dim=1).data
125
+ max_probs, predictions = output_data.max(1)
126
+
127
+
128
+ prediction = predictions.numpy().astype(np.uint8)
129
+ test = decode_segmap( prediction[0])
130
+
131
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
132
+ ffnet_segm.interpreter.destory()
133
+
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import os
4
+ import sys
5
+ from typing import Callable, Tuple
6
+ from models import resnet
7
+ from models.ffnet_blocks import create_ffnet
8
+ torch.set_grad_enabled(False)
9
+
10
+
11
+
12
+ def segmentation_ffnet54S_dBBB_mobile():
13
+ return create_ffnet(
14
+ ffnet_head_type="B_mobile",
15
+ task="segmentation_B",
16
+ num_classes=19,
17
+ model_name="ffnnet54S_dBBB_mobile",
18
+ backbone=resnet.Resnet54S_D,
19
+ pre_downsampling=False,
20
+ pretrained_weights_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth"),
21
+ strict_loading=True,
22
+ )
23
+
24
+
25
+
26
+
27
+ ffnet54_dbbb = segmentation_ffnet54S_dBBB_mobile()
28
+
29
+
30
+
31
+
32
+ num_params = sum(p.numel() for p in ffnet54_dbbb.parameters() if p.requires_grad)
33
+ print(f'Number of palm_detector parameters: {num_params}')
34
+
35
+
36
+ ffnet_seg = ffnet54_dbbb
37
+
38
+ seg_d_in = torch.randn(1, 3, 512, 1024,dtype= torch.float32)
39
+
40
+
41
+ source_model = torch.jit.trace(ffnet_seg,seg_d_in)
42
+ source_model.save("ffnet54S_dBBB_cityscapes_state_dict_quarts.pt")
43
+ print("export pose detect ok!")
44
+
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ from .ffnet_S_mobile import *
5
+ from .ffnet_NS_mobile import *
6
+ from .ffnet_gpu_large import *
7
+ from .ffnet_S_gpu_large import *
8
+ from .ffnet_N_gpu_large import *
9
+ from .ffnet_gpu_small import *
10
+ from .ffnet_S_gpu_small import *
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (366 Bytes). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc ADDED
Binary file (5.52 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc ADDED
Binary file (2.88 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc ADDED
Binary file (1.68 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc ADDED
Binary file (2.33 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc ADDED
Binary file (9.49 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc ADDED
Binary file (13.6 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc ADDED
Binary file (4.33 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc ADDED
Binary file (7.12 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc ADDED
Binary file (758 Bytes). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc ADDED
Binary file (13.6 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc ADDED
Binary file (1.07 kB). View file
 
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import os
5
+ from functools import partial
6
+
7
+ import torch
8
+
9
+
10
+ from models.utils import model_weight_initializer
11
+ from models import resnet
12
+
13
+ import os
14
+ import sys
15
+ import numpy as np
16
+
17
+ import torch.nn as nn
18
+ import torch._utils
19
+ import torch.nn.functional as F
20
+
21
+ from models.ffnet_blocks import create_ffnet
22
+ from models.model_registry import register_model
23
+ from config import model_weights_base_path
24
+
25
+
26
+ ##########################################################################################
27
+ ##### 3-Stage Mobile FFNets trained for 1024x512 images, outputing segmentation maps of
28
+ ##### 256x128 pixels. These models are intended for use with the
29
+ ##### cityscapes evaluation script, which uses image sizes of 2048x1024
30
+ ##########################################################################################
31
+ @register_model
32
+ def segmentation_ffnet122NS_CBB_mobile_pre_down():
33
+ return create_ffnet(
34
+ ffnet_head_type="B_mobile",
35
+ task="segmentation_B",
36
+ num_classes=19,
37
+ model_name="ffnnet122NS_CBB_mobile_pre_down",
38
+ backbone=resnet.Resnet122NS,
39
+ pre_downsampling=True, # Downsample the incoming image, before passing it to the network
40
+ pretrained_weights_path=os.path.join(
41
+ model_weights_base_path,
42
+ "ffnet122NS/ffnet122NS_CBB_cityscapes_state_dict_quarts.pth",
43
+ ),
44
+ strict_loading=True,
45
+ )
46
+
47
+
48
+ @register_model
49
+ def segmentation_ffnet74NS_CBB_mobile_pre_down():
50
+ return create_ffnet(
51
+ ffnet_head_type="B_mobile",
52
+ task="segmentation_B",
53
+ num_classes=19,
54
+ model_name="ffnnet74NS_CBB_mobile_pre_down",
55
+ backbone=resnet.Resnet74NS,
56
+ pre_downsampling=True, # Downsample the incoming image, before passing it to the network
57
+ pretrained_weights_path=os.path.join(
58
+ model_weights_base_path,
59
+ "ffnet74NS/ffnet74NS_CBB_cityscapes_state_dict_quarts.pth",
60
+ ),
61
+ strict_loading=True,
62
+ )
63
+
64
+
65
+ @register_model
66
+ def segmentation_ffnet46NS_CBB_mobile_pre_down():
67
+ return create_ffnet(
68
+ ffnet_head_type="B_mobile",
69
+ task="segmentation_B",
70
+ num_classes=19,
71
+ model_name="ffnnet46NS_CBB_mobile_pre_down",
72
+ backbone=resnet.Resnet46NS,
73
+ pre_downsampling=True, # Downsample the incoming image, before passing it to the network
74
+ pretrained_weights_path=os.path.join(
75
+ model_weights_base_path,
76
+ "ffnet46NS/ffnet46NS_CBB_cityscapes_state_dict_quarts.pth",
77
+ ),
78
+ strict_loading=True,
79
+ )
80
+
81
+
82
+ @register_model
83
+ def segmentation_ffnet122NS_CCC_mobile_pre_down():
84
+ return create_ffnet(
85
+ ffnet_head_type="C_mobile",
86
+ task="segmentation_C",
87
+ num_classes=19,
88
+ model_name="ffnnet122NS_CCC_mobile_pre_down",
89
+ backbone=resnet.Resnet122NS,
90
+ pre_downsampling=True, # Downsample the incoming image, before passing it to the network
91
+ pretrained_weights_path=os.path.join(
92
+ model_weights_base_path,
93
+ "ffnet122NS/ffnet122NS_CCC_cityscapes_state_dict_quarts.pth",
94
+ ),
95
+ strict_loading=True,
96
+ )
97
+
98
+
99
+ @register_model
100
+ def segmentation_ffnet74NS_CCC_mobile_pre_down():
101
+ return create_ffnet(
102
+ ffnet_head_type="C_mobile",
103
+ task="segmentation_C",
104
+ num_classes=19,
105
+ model_name="ffnnet74NS_CCC_mobile_pre_down",
106
+ backbone=resnet.Resnet74NS,
107
+ pre_downsampling=True, # Downsample the incoming image, before passing it to the network
108
+ pretrained_weights_path=os.path.join(
109
+ model_weights_base_path,
110
+ "ffnet74NS/ffnet74NS_CCC_cityscapes_state_dict_quarts.pth",
111
+ ),
112
+ strict_loading=True,
113
+ )
114
+
115
+
116
+ @register_model
117
+ def segmentation_ffnet46NS_CCC_mobile_pre_down():
118
+ return create_ffnet(
119
+ ffnet_head_type="C_mobile",
120
+ task="segmentation_C",
121
+ num_classes=19,
122
+ model_name="ffnnet46NS_CCC_mobile_pre_down",
123
+ backbone=resnet.Resnet46NS,
124
+ pre_downsampling=True, # Downsample the incoming image, before passing it to the network
125
+ pretrained_weights_path=os.path.join(
126
+ model_weights_base_path,
127
+ "ffnet46NS/ffnet46NS_CCC_cityscapes_state_dict_quarts.pth",
128
+ ),
129
+ strict_loading=True,
130
+ )
131
+
132
+
133
+ ##########################################################################################
134
+ ##### The **actual** 3-Stage Mobile FFNets to export / use with 1024x512 images directly,
135
+ ##### and output a segmentation map of 256x128 pixels
136
+ ##########################################################################################
137
+ #
138
+ @register_model
139
+ def segmentation_ffnet122NS_CBB_mobile():
140
+ return create_ffnet(
141
+ ffnet_head_type="B_mobile",
142
+ task="segmentation_B",
143
+ num_classes=19,
144
+ model_name="ffnnet122NS_CBB_mobile",
145
+ backbone=resnet.Resnet122NS,
146
+ pre_downsampling=False,
147
+ pretrained_weights_path=os.path.join(
148
+ model_weights_base_path,
149
+ "ffnet122NS/ffnet122NS_CBB_cityscapes_state_dict_quarts.pth",
150
+ ),
151
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
152
+ )
153
+
154
+
155
+ @register_model
156
+ def segmentation_ffnet74NS_CBB_mobile():
157
+ return create_ffnet(
158
+ ffnet_head_type="B_mobile",
159
+ task="segmentation_B",
160
+ num_classes=19,
161
+ model_name="ffnnet74NS_CBB_mobile",
162
+ backbone=resnet.Resnet74NS,
163
+ pre_downsampling=False,
164
+ pretrained_weights_path=os.path.join(
165
+ model_weights_base_path,
166
+ "ffnet74NS/ffnet74NS_CBB_cityscapes_state_dict_quarts.pth",
167
+ ),
168
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
169
+ )
170
+
171
+
172
+ @register_model
173
+ def segmentation_ffnet46NS_CBB_mobile():
174
+ return create_ffnet(
175
+ ffnet_head_type="B_mobile",
176
+ task="segmentation_B",
177
+ num_classes=19,
178
+ model_name="ffnnet46NS_CBB_mobile",
179
+ backbone=resnet.Resnet46NS,
180
+ pre_downsampling=False,
181
+ pretrained_weights_path=os.path.join(
182
+ model_weights_base_path,
183
+ "ffnet46NS/ffnet46NS_CBB_cityscapes_state_dict_quarts.pth",
184
+ ),
185
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
186
+ )
187
+
188
+
189
+ @register_model
190
+ def segmentation_ffnet122NS_CCC_mobile():
191
+ return create_ffnet(
192
+ ffnet_head_type="C_mobile",
193
+ task="segmentation_C",
194
+ num_classes=19,
195
+ model_name="ffnnet122NS_CCC_mobile",
196
+ backbone=resnet.Resnet122NS,
197
+ pre_downsampling=False,
198
+ pretrained_weights_path=os.path.join(
199
+ model_weights_base_path,
200
+ "ffnet122NS/ffnet122NS_CCC_cityscapes_state_dict_quarts.pth",
201
+ ),
202
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
203
+ )
204
+
205
+
206
+ @register_model
207
+ def segmentation_ffnet74NS_CCC_mobile():
208
+ return create_ffnet(
209
+ ffnet_head_type="C_mobile",
210
+ task="segmentation_C",
211
+ num_classes=19,
212
+ model_name="ffnnet74NS_CCC_mobile",
213
+ backbone=resnet.Resnet74NS,
214
+ pre_downsampling=False,
215
+ pretrained_weights_path=os.path.join(
216
+ model_weights_base_path,
217
+ "ffnet74NS/ffnet74NS_CCC_cityscapes_state_dict_quarts.pth",
218
+ ),
219
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
220
+ )
221
+
222
+
223
+ @register_model
224
+ def segmentation_ffnet46NS_CCC_mobile():
225
+ return create_ffnet(
226
+ ffnet_head_type="C_mobile",
227
+ task="segmentation_C",
228
+ num_classes=19,
229
+ model_name="ffnnet46NS_CCC_mobile",
230
+ backbone=resnet.Resnet46NS,
231
+ pre_downsampling=False,
232
+ pretrained_weights_path=os.path.join(
233
+ model_weights_base_path,
234
+ "ffnet46NS/ffnet46NS_CCC_cityscapes_state_dict_quarts.pth",
235
+ ),
236
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
237
+ )
238
+
239
+
240
+ ##########################################################################################
241
+ ##### Classification models with an FFNet structure. Primarily intended for imagenet
242
+ ##### initialization of FFNet.
243
+ ##### See the README for the hyperparameters for training the classification models
244
+ ##########################################################################################
245
+ @register_model
246
+ def classification_ffnet122NS_CBX_mobile():
247
+ return create_ffnet(
248
+ ffnet_head_type="B_mobile",
249
+ task="classification",
250
+ num_classes=1000,
251
+ model_name="ffnnet122NS_CBX_mobile",
252
+ backbone=resnet.Resnet122NS,
253
+ pretrained_weights_path=os.path.join(
254
+ model_weights_base_path,
255
+ "ffnet122NS/ffnet122NS_CBX_imagenet_state_dict_quarts.pth",
256
+ ),
257
+ pretrained_backbone_only=False,
258
+ strict_loading=True,
259
+ dropout_rate=0.2,
260
+ )
261
+
262
+
263
+ @register_model
264
+ def classification_ffnet74NS_CBX_mobile():
265
+ return create_ffnet(
266
+ ffnet_head_type="B_mobile",
267
+ task="classification",
268
+ num_classes=1000,
269
+ model_name="ffnnet74NS_CBX_mobile",
270
+ backbone=resnet.Resnet74NS,
271
+ pretrained_weights_path=os.path.join(
272
+ model_weights_base_path,
273
+ "ffnet74NS/ffnet74NS_CBX_imagenet_state_dict_quarts.pth",
274
+ ),
275
+ pretrained_backbone_only=False,
276
+ strict_loading=True,
277
+ dropout_rate=0.2,
278
+ )
279
+
280
+
281
+ @register_model
282
+ def classification_ffnet46NS_CBX_mobile():
283
+ return create_ffnet(
284
+ ffnet_head_type="B_mobile",
285
+ task="classification",
286
+ num_classes=1000,
287
+ model_name="ffnnet46NS_CBX_mobile",
288
+ backbone=resnet.Resnet46NS,
289
+ pretrained_weights_path=os.path.join(
290
+ model_weights_base_path,
291
+ "ffnet46NS/ffnet46NS_CBX_imagenet_state_dict_quarts.pth",
292
+ ),
293
+ pretrained_backbone_only=False,
294
+ strict_loading=True,
295
+ dropout_rate=0.2,
296
+ )
297
+
298
+
299
+ ##########################################################################################
300
+ ##### This is an example of how these FFNet models, which are intended for 1024x512 images
301
+ ##### would be initialized for training on cityscapes with 2048x1024 images
302
+ ##########################################################################################
303
+ @register_model
304
+ def segmentation_ffnet122NS_CBB_mobile_pre_down_train():
305
+ return create_ffnet(
306
+ ffnet_head_type="B_mobile",
307
+ task="segmentation_B",
308
+ num_classes=19,
309
+ model_name="ffnnet122NS_CBB_mobile_pre_down",
310
+ backbone=resnet.Resnet122NS,
311
+ pre_downsampling=True,
312
+ pretrained_weights_path=os.path.join(
313
+ model_weights_base_path,
314
+ "ffnet122NS/ffnet122NS_CBX_imagenet_state_dict_quarts.pth",
315
+ ),
316
+ pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
317
+ strict_loading=False, # Strict loading is false here because the weights are going into a model with pre_downsampling=True
318
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import os
5
+ from functools import partial
6
+
7
+ import torch
8
+
9
+
10
+ from models import resnet
11
+
12
+ import os
13
+ import sys
14
+ import numpy as np
15
+
16
+ import torch.nn as nn
17
+ import torch._utils
18
+ import torch.nn.functional as F
19
+
20
+ from models.ffnet_blocks import create_ffnet
21
+ from models.model_registry import register_model
22
+ from config import model_weights_base_path
23
+
24
+
25
+ ##########################################################################################
26
+ ##### 3-Stage GPU FFNets. These are trained for use with image sizes of 2048x1024 and
27
+ ##### output segmentation maps of size 512x256 pixels
28
+ ##########################################################################################
29
+ @register_model
30
+ def segmentation_ffnet122N_CBB():
31
+ return create_ffnet(
32
+ ffnet_head_type="B",
33
+ task="segmentation_B",
34
+ num_classes=19,
35
+ model_name="ffnnet122N_CBB",
36
+ backbone=resnet.Resnet122N,
37
+ pre_downsampling=False,
38
+ pretrained_weights_path=os.path.join(
39
+ model_weights_base_path,
40
+ "ffnet122N/ffnet122N_CBB_cityscapes_state_dict_quarts.pth",
41
+ ),
42
+ strict_loading=True,
43
+ )
44
+
45
+
46
+ @register_model
47
+ def segmentation_ffnet74N_CBB():
48
+ return create_ffnet(
49
+ ffnet_head_type="B",
50
+ task="segmentation_B",
51
+ num_classes=19,
52
+ model_name="ffnnet74N_CBB",
53
+ backbone=resnet.Resnet74N,
54
+ pre_downsampling=False,
55
+ pretrained_weights_path=os.path.join(
56
+ model_weights_base_path,
57
+ "ffnet74N/ffnet74N_CBB_cityscapes_state_dict_quarts.pth",
58
+ ),
59
+ strict_loading=True,
60
+ )
61
+
62
+
63
+ @register_model
64
+ def segmentation_ffnet46N_CBB():
65
+ return create_ffnet(
66
+ ffnet_head_type="B",
67
+ task="segmentation_B",
68
+ num_classes=19,
69
+ model_name="ffnnet46N_CBB",
70
+ backbone=resnet.Resnet46N,
71
+ pre_downsampling=False,
72
+ pretrained_weights_path=os.path.join(
73
+ model_weights_base_path,
74
+ "ffnet46N/ffnet46N_CBB_cityscapes_state_dict_quarts.pth",
75
+ ),
76
+ strict_loading=True,
77
+ )
78
+
79
+
80
+ ##########################################################################################
81
+ ##### Classification models with an FFNet structure. Primarily intended for imagenet
82
+ ##### initialization of FFNet.
83
+ ##### See the README for the hyperparameters for training the classification models
84
+ ##########################################################################################
85
+ @register_model
86
+ def classification_ffnet122N_CBX():
87
+ return create_ffnet(
88
+ ffnet_head_type="B",
89
+ task="classification",
90
+ num_classes=1000,
91
+ model_name="ffnnet122N_CBX",
92
+ backbone=resnet.Resnet122N,
93
+ pretrained_weights_path=os.path.join(
94
+ model_weights_base_path,
95
+ "ffnet122N/ffnet122N_CBX_imagenet_state_dict_quarts.pth",
96
+ ),
97
+ pretrained_backbone_only=False,
98
+ strict_loading=True,
99
+ dropout_rate=0.2,
100
+ )
101
+
102
+
103
+ @register_model
104
+ def classification_ffnet74N_CBX():
105
+ return create_ffnet(
106
+ ffnet_head_type="B",
107
+ task="classification",
108
+ num_classes=1000,
109
+ model_name="ffnnet74N_CBX",
110
+ backbone=resnet.Resnet74N,
111
+ pretrained_weights_path=os.path.join(
112
+ model_weights_base_path,
113
+ "ffnet74N/ffnet74N_CBX_imagenet_state_dict_quarts.pth",
114
+ ),
115
+ pretrained_backbone_only=False,
116
+ strict_loading=True,
117
+ dropout_rate=0.2,
118
+ )
119
+
120
+
121
+ @register_model
122
+ def classification_ffnet46N_CBX():
123
+ return create_ffnet(
124
+ ffnet_head_type="B",
125
+ task="classification",
126
+ num_classes=1000,
127
+ model_name="ffnnet46N_CBX",
128
+ backbone=resnet.Resnet46N,
129
+ pretrained_weights_path=os.path.join(
130
+ model_weights_base_path,
131
+ "ffnet46N/ffnet46N_CBX_imagenet_state_dict_quarts.pth",
132
+ ),
133
+ pretrained_backbone_only=False,
134
+ strict_loading=True,
135
+ dropout_rate=0.2,
136
+ )
137
+
138
+
139
+ ##########################################################################################
140
+ ##### This is an example of how these FFNet models would be initialized for training on
141
+ ##### cityscapes with 2048x1024 images
142
+ ##########################################################################################
143
+ @register_model
144
+ def segmentation_ffnet122N_CBB_train():
145
+ return create_ffnet(
146
+ ffnet_head_type="B",
147
+ task="segmentation_B",
148
+ num_classes=19,
149
+ model_name="ffnnet122N_CBB",
150
+ backbone=resnet.Resnet122N,
151
+ pretrained_weights_path=os.path.join(
152
+ model_weights_base_path,
153
+ "ffnet122N/ffnet122N_CBX_imagenet_state_dict_quarts.pth",
154
+ ),
155
+ pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
156
+ strict_loading=False,
157
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import os
5
+ from functools import partial
6
+
7
+ import torch
8
+
9
+
10
+ from models import resnet
11
+
12
+ import os
13
+ import sys
14
+ import numpy as np
15
+
16
+ import torch.nn as nn
17
+ import torch._utils
18
+ import torch.nn.functional as F
19
+
20
+ from models.ffnet_blocks import create_ffnet
21
+ from models.model_registry import register_model
22
+ from config import model_weights_base_path
23
+
24
+
25
+ ##########################################################################################
26
+ ##### 4-Stage GPU FFNets with Slim backbone.
27
+ ##### These are trained for use with image sizes of 2048x1024
28
+ ##### and output a segmentation map of 512x256 pixels
29
+ ##########################################################################################
30
+ @register_model
31
+ def segmentation_ffnet150S_BBB():
32
+ return create_ffnet(
33
+ ffnet_head_type="B",
34
+ task="segmentation_B",
35
+ num_classes=19,
36
+ model_name="ffnnet150S_BBB",
37
+ backbone=resnet.Resnet150S,
38
+ pretrained_weights_path=os.path.join(
39
+ model_weights_base_path,
40
+ "ffnet150S/ffnet150S_BBB_gpu_cityscapes_state_dict_quarts.pth",
41
+ ),
42
+ strict_loading=True,
43
+ )
44
+
45
+
46
+ @register_model
47
+ def segmentation_ffnet86S_BBB():
48
+ return create_ffnet(
49
+ ffnet_head_type="B",
50
+ task="segmentation_B",
51
+ num_classes=19,
52
+ model_name="ffnnet86S_BBB",
53
+ backbone=resnet.Resnet86S,
54
+ pretrained_weights_path=os.path.join(
55
+ model_weights_base_path,
56
+ "ffnet86S/ffnet86S_BBB_gpu_cityscapes_state_dict_quarts.pth",
57
+ ),
58
+ strict_loading=True,
59
+ )
60
+
61
+
62
+ ##########################################################################################
63
+ ##### This is an example of how these FFNet models would be initialized for training on
64
+ ##### cityscapes with 2048x1024 images
65
+ ##########################################################################################
66
+ @register_model
67
+ def segmentation_ffnet86S_BBB_train():
68
+ return create_ffnet(
69
+ ffnet_head_type="B",
70
+ task="segmentation_B",
71
+ num_classes=19,
72
+ model_name="ffnnet86S_BBB",
73
+ backbone=resnet.Resnet86S,
74
+ pretrained_weights_path=os.path.join(
75
+ model_weights_base_path,
76
+ "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
77
+ ),
78
+ pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
79
+ strict_loading=False,
80
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import os
5
+ from functools import partial
6
+
7
+ import torch
8
+
9
+
10
+ from models import resnet
11
+
12
+ import os
13
+ import sys
14
+ import numpy as np
15
+
16
+ import torch.nn as nn
17
+ import torch._utils
18
+ import torch.nn.functional as F
19
+
20
+ from models.ffnet_blocks import create_ffnet
21
+ from models.model_registry import register_model
22
+ from config import model_weights_base_path
23
+
24
+
25
+ ##########################################################################################
26
+ ##### 4-Stage GPU FFNets with Slim backbone.
27
+ ##### These are trained for use with image sizes of 2048x1024
28
+ ##### and output a segmentation map of 256x128 pixels
29
+ ##########################################################################################
30
+ @register_model
31
+ def segmentation_ffnet150S_dBBB():
32
+ return create_ffnet(
33
+ ffnet_head_type="B",
34
+ task="segmentation_B",
35
+ num_classes=19,
36
+ model_name="ffnnet150S_dBBB",
37
+ backbone=resnet.Resnet150S_D,
38
+ pre_downsampling=False,
39
+ pretrained_weights_path=os.path.join(
40
+ model_weights_base_path,
41
+ "ffnet150S/ffnet150S_dBBB_gpu_cityscapes_state_dict_quarts.pth",
42
+ ),
43
+ strict_loading=True,
44
+ )
45
+
46
+
47
+ @register_model
48
+ def segmentation_ffnet86S_dBBB():
49
+ return create_ffnet(
50
+ ffnet_head_type="B",
51
+ task="segmentation_B",
52
+ num_classes=19,
53
+ model_name="ffnnet86S_dBBB",
54
+ backbone=resnet.Resnet86S_D,
55
+ pre_downsampling=False,
56
+ pretrained_weights_path=os.path.join(
57
+ model_weights_base_path,
58
+ "ffnet86S/ffnet86S_dBBB_gpu_cityscapes_state_dict_quarts.pth",
59
+ ),
60
+ strict_loading=True,
61
+ )
62
+
63
+
64
+ ##########################################################################################
65
+ ##### Classification models with an FFNet structure. Primarily intended for imagenet
66
+ ##### initialization of FFNet.
67
+ ##### See the README for the hyperparameters for training the classification models
68
+ ##########################################################################################
69
+ @register_model
70
+ def classification_ffnet150S_BBX():
71
+ return create_ffnet(
72
+ ffnet_head_type="B",
73
+ task="classification",
74
+ num_classes=1000,
75
+ model_name="ffnnet150S_BBX",
76
+ backbone=resnet.Resnet150S,
77
+ pretrained_weights_path=os.path.join(
78
+ model_weights_base_path,
79
+ "ffnet150S/ffnet150S_BBX_gpu_imagenet_state_dict_quarts.pth",
80
+ ),
81
+ strict_loading=True,
82
+ )
83
+
84
+
85
+ @register_model
86
+ def classification_ffnet86S_BBX():
87
+ return create_ffnet(
88
+ ffnet_head_type="B",
89
+ task="classification",
90
+ num_classes=1000,
91
+ model_name="ffnnet86S_BBX",
92
+ backbone=resnet.Resnet86S,
93
+ pretrained_weights_path=os.path.join(
94
+ model_weights_base_path,
95
+ "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
96
+ ),
97
+ strict_loading=True,
98
+ )
99
+
100
+
101
+ ##########################################################################################
102
+ ##### This is an example of how these FFNet models would be initialized for training on
103
+ ##### cityscapes with 2048x1024 images
104
+ ##########################################################################################
105
+ @register_model
106
+ def segmentation_ffnet86S_dBBB_train():
107
+ return create_ffnet(
108
+ ffnet_head_type="B",
109
+ task="segmentation_B",
110
+ num_classes=19,
111
+ model_name="ffnnet86S_dBBB",
112
+ backbone=resnet.Resnet86S_D,
113
+ pretrained_weights_path=os.path.join(
114
+ model_weights_base_path,
115
+ "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
116
+ ),
117
+ pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
118
+ strict_loading=False,
119
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import os
5
+ from functools import partial
6
+
7
+ import torch
8
+
9
+
10
+ from models import resnet
11
+
12
+ import os
13
+ import sys
14
+ import numpy as np
15
+
16
+ import torch.nn as nn
17
+ import torch._utils
18
+ import torch.nn.functional as F
19
+
20
+ from models.ffnet_blocks import create_ffnet
21
+ from models.model_registry import register_model
22
+ from config import model_weights_base_path
23
+
24
+
25
+ ##########################################################################################
26
+ ##### 4-Stage Mobile FFNets with Slim backbone.
27
+ ##### These are trained for use with image sizes of 2048x1024, and output a segmentation map
28
+ ##### of 256x128 pixels
29
+ ##########################################################################################
30
+ @register_model
31
+ def segmentation_ffnet86S_dBBB_mobile():
32
+ return create_ffnet(
33
+ ffnet_head_type="B_mobile",
34
+ task="segmentation_B",
35
+ num_classes=19,
36
+ model_name="ffnnet86S_dBBB_mobile",
37
+ backbone=resnet.Resnet86S_D,
38
+ pre_downsampling=False,
39
+ pretrained_weights_path=os.path.join(
40
+ model_weights_base_path,
41
+ "ffnet86S/ffnet86S_dBBB_cityscapes_state_dict_quarts.pth",
42
+ ),
43
+ strict_loading=True,
44
+ )
45
+
46
+
47
+ @register_model
48
+ def segmentation_ffnet78S_dBBB_mobile():
49
+ return create_ffnet(
50
+ ffnet_head_type="B_mobile",
51
+ task="segmentation_B",
52
+ num_classes=19,
53
+ model_name="ffnnet78S_dBBB_mobile",
54
+ backbone=resnet.Resnet78S_D,
55
+ pre_downsampling=False,
56
+ pretrained_weights_path=os.path.join(
57
+ model_weights_base_path,
58
+ "ffnet78S/ffnet78S_dBBB_cityscapes_state_dict_quarts.pth",
59
+ ),
60
+ strict_loading=True,
61
+ )
62
+
63
+
64
+ @register_model
65
+ def segmentation_ffnet54S_dBBB_mobile():
66
+ return create_ffnet(
67
+ ffnet_head_type="B_mobile",
68
+ task="segmentation_B",
69
+ num_classes=19,
70
+ model_name="ffnnet54S_dBBB_mobile",
71
+ backbone=resnet.Resnet54S_D,
72
+ pre_downsampling=False,
73
+ pretrained_weights_path=os.path.join(
74
+ model_weights_base_path,
75
+ "ffnet54S/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth",
76
+ ),
77
+ strict_loading=True,
78
+ )
79
+
80
+
81
+ @register_model
82
+ def segmentation_ffnet40S_dBBB_mobile():
83
+ return create_ffnet(
84
+ ffnet_head_type="B_mobile",
85
+ task="segmentation_B",
86
+ num_classes=19,
87
+ model_name="ffnnet40S_dBBB_mobile",
88
+ backbone=resnet.Resnet40S_D,
89
+ pre_downsampling=False,
90
+ pretrained_weights_path=os.path.join(
91
+ model_weights_base_path,
92
+ "ffnet40S/ffnet40S_dBBB_cityscapes_state_dict_quarts.pth",
93
+ ),
94
+ strict_loading=True,
95
+ )
96
+
97
+
98
+ ##########################################################################################
99
+ ##### 4-Stage Mobile FFNets with Slim backbone, trained for use with image sizes of 1024x512
100
+ ##### and output a segmentation map of 256x128 pixels
101
+ ##### These versions are meant for use with the cityscapes evaluation script, which provides
102
+ ##### inputs at 2048x1024
103
+ ##########################################################################################
104
+ @register_model
105
+ def segmentation_ffnet150S_BBB_mobile_pre_down():
106
+ return create_ffnet(
107
+ ffnet_head_type="B_mobile",
108
+ task="segmentation_B",
109
+ num_classes=19,
110
+ model_name="ffnnet150S_BBB_mobile_pre_down",
111
+ backbone=resnet.Resnet150S,
112
+ pre_downsampling=True,
113
+ pretrained_weights_path=os.path.join(
114
+ model_weights_base_path,
115
+ "ffnet150S/ffnet150S_BBB_cityscapes_state_dict_quarts.pth",
116
+ ),
117
+ strict_loading=True,
118
+ )
119
+
120
+
121
+ @register_model
122
+ def segmentation_ffnet86S_BBB_mobile_pre_down():
123
+ return create_ffnet(
124
+ ffnet_head_type="B_mobile",
125
+ task="segmentation_B",
126
+ num_classes=19,
127
+ model_name="ffnnet86S_BBB_mobile_pre_down",
128
+ backbone=resnet.Resnet86S,
129
+ pre_downsampling=True,
130
+ pretrained_weights_path=os.path.join(
131
+ model_weights_base_path,
132
+ "ffnet86S/ffnet86S_BBB_cityscapes_state_dict_quarts.pth",
133
+ ),
134
+ strict_loading=True,
135
+ )
136
+
137
+
138
+ @register_model
139
+ def segmentation_ffnet78S_BBB_mobile_pre_down():
140
+ return create_ffnet(
141
+ ffnet_head_type="B_mobile",
142
+ task="segmentation_B",
143
+ num_classes=19,
144
+ model_name="ffnnet78S_BBB_mobile_pre_down",
145
+ backbone=resnet.Resnet78S,
146
+ pre_downsampling=True,
147
+ pretrained_weights_path=os.path.join(
148
+ model_weights_base_path,
149
+ "ffnet78S/ffnet78S_BBB_cityscapes_state_dict_quarts.pth",
150
+ ),
151
+ strict_loading=True,
152
+ )
153
+
154
+
155
+ @register_model
156
+ def segmentation_ffnet54S_BBB_mobile_pre_down():
157
+ return create_ffnet(
158
+ ffnet_head_type="B_mobile",
159
+ task="segmentation_B",
160
+ num_classes=19,
161
+ model_name="ffnnet54S_BBB_mobile_pre_down",
162
+ backbone=resnet.Resnet54S,
163
+ pre_downsampling=True,
164
+ pretrained_weights_path=os.path.join(
165
+ model_weights_base_path,
166
+ "ffnet54S/ffnet54S_BBB_cityscapes_state_dict_quarts.pth",
167
+ ),
168
+ strict_loading=True,
169
+ )
170
+
171
+
172
+ @register_model
173
+ def segmentation_ffnet40S_BBB_mobile_pre_down():
174
+ return create_ffnet(
175
+ ffnet_head_type="B_mobile",
176
+ task="segmentation_B",
177
+ num_classes=19,
178
+ model_name="ffnnet40S_BBB_mobile_pre_down",
179
+ backbone=resnet.Resnet40S,
180
+ pre_downsampling=True,
181
+ pretrained_weights_path=os.path.join(
182
+ model_weights_base_path,
183
+ "ffnet40S/ffnet40S_BBB_cityscapes_state_dict_quarts.pth",
184
+ ),
185
+ strict_loading=True,
186
+ )
187
+
188
+
189
+ @register_model
190
+ def segmentation_ffnet150S_BCC_mobile_pre_down():
191
+ return create_ffnet(
192
+ ffnet_head_type="C_mobile",
193
+ task="segmentation_C",
194
+ num_classes=19,
195
+ model_name="ffnnet150S_BCC_mobile_pre_down",
196
+ backbone=resnet.Resnet150S,
197
+ pre_downsampling=True,
198
+ pretrained_weights_path=os.path.join(
199
+ model_weights_base_path,
200
+ "ffnet150S/ffnet150S_BCC_cityscapes_state_dict_quarts.pth",
201
+ ),
202
+ strict_loading=True,
203
+ )
204
+
205
+
206
+ @register_model
207
+ def segmentation_ffnet86S_BCC_mobile_pre_down():
208
+ return create_ffnet(
209
+ ffnet_head_type="C_mobile",
210
+ task="segmentation_C",
211
+ num_classes=19,
212
+ model_name="ffnnet86S_BCC_mobile_pre_down",
213
+ backbone=resnet.Resnet86S,
214
+ pre_downsampling=True,
215
+ pretrained_weights_path=os.path.join(
216
+ model_weights_base_path,
217
+ "ffnet86S/ffnet86S_BCC_cityscapes_state_dict_quarts.pth",
218
+ ),
219
+ strict_loading=True,
220
+ )
221
+
222
+
223
+ @register_model
224
+ def segmentation_ffnet78S_BCC_mobile_pre_down():
225
+ return create_ffnet(
226
+ ffnet_head_type="C_mobile",
227
+ task="segmentation_C",
228
+ num_classes=19,
229
+ model_name="ffnnet78S_BCC_mobile_pre_down",
230
+ backbone=resnet.Resnet78S,
231
+ pre_downsampling=True,
232
+ pretrained_weights_path=os.path.join(
233
+ model_weights_base_path,
234
+ "ffnet78S/ffnet78S_BCC_cityscapes_state_dict_quarts.pth",
235
+ ),
236
+ strict_loading=True,
237
+ )
238
+
239
+
240
+ @register_model
241
+ def segmentation_ffnet54S_BCC_mobile_pre_down():
242
+ return create_ffnet(
243
+ ffnet_head_type="C_mobile",
244
+ task="segmentation_C",
245
+ num_classes=19,
246
+ model_name="ffnnet54S_BCC_mobile_pre_down",
247
+ backbone=resnet.Resnet54S,
248
+ pre_downsampling=True,
249
+ pretrained_weights_path=os.path.join(
250
+ model_weights_base_path,
251
+ "ffnet54S/ffnet54S_BCC_cityscapes_state_dict_quarts.pth",
252
+ ),
253
+ strict_loading=True,
254
+ )
255
+
256
+
257
+ @register_model
258
+ def segmentation_ffnet40S_BCC_mobile_pre_down():
259
+ return create_ffnet(
260
+ ffnet_head_type="C_mobile",
261
+ task="segmentation_C",
262
+ num_classes=19,
263
+ model_name="ffnnet40S_BCC_mobile_pre_down",
264
+ backbone=resnet.Resnet40S,
265
+ pre_downsampling=True,
266
+ pretrained_weights_path=os.path.join(
267
+ model_weights_base_path,
268
+ "ffnet40S/ffnet40S_BCC_cityscapes_state_dict_quarts.pth",
269
+ ),
270
+ strict_loading=True,
271
+ )
272
+
273
+
274
+ ##########################################################################################
275
+ ##### 4-Stage Mobile FFNets with Slim backbone.
276
+ ##### These are the actual models, trained for use with image sizes of 1024x512
277
+ ##### and output a segmentation map of 256x128 pixels
278
+ ##### See the versions with _pre_down suffix for models to use with the cityscapes evaluation script
279
+ ##########################################################################################
280
+ @register_model
281
+ def segmentation_ffnet150S_BBB_mobile():
282
+ return create_ffnet(
283
+ ffnet_head_type="B_mobile",
284
+ task="segmentation_B",
285
+ num_classes=19,
286
+ model_name="ffnnet150S_BBB_mobile",
287
+ backbone=resnet.Resnet150S,
288
+ pre_downsampling=False,
289
+ pretrained_weights_path=os.path.join(
290
+ model_weights_base_path,
291
+ "ffnet150S/ffnet150S_BBB_cityscapes_state_dict_quarts.pth",
292
+ ),
293
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
294
+ )
295
+
296
+
297
+ @register_model
298
+ def segmentation_ffnet86S_BBB_mobile():
299
+ return create_ffnet(
300
+ ffnet_head_type="B_mobile",
301
+ task="segmentation_B",
302
+ num_classes=19,
303
+ model_name="ffnnet86S_BBB_mobile",
304
+ backbone=resnet.Resnet86S,
305
+ pre_downsampling=False,
306
+ pretrained_weights_path=os.path.join(
307
+ model_weights_base_path,
308
+ "ffnet86S/ffnet86S_BBB_cityscapes_state_dict_quarts.pth",
309
+ ),
310
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
311
+ )
312
+
313
+
314
+ @register_model
315
+ def segmentation_ffnet78S_BBB_mobile():
316
+ return create_ffnet(
317
+ ffnet_head_type="B_mobile",
318
+ task="segmentation_B",
319
+ num_classes=19,
320
+ model_name="ffnnet78S_BBB_mobile",
321
+ backbone=resnet.Resnet78S,
322
+ pre_downsampling=False,
323
+ pretrained_weights_path=os.path.join(
324
+ model_weights_base_path,
325
+ "ffnet78S/ffnet78S_BBB_cityscapes_state_dict_quarts.pth",
326
+ ),
327
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
328
+ )
329
+
330
+
331
+ @register_model
332
+ def segmentation_ffnet54S_BBB_mobile():
333
+ return create_ffnet(
334
+ ffnet_head_type="B_mobile",
335
+ task="segmentation_B",
336
+ num_classes=19,
337
+ model_name="ffnnet54S_BBB_mobile",
338
+ backbone=resnet.Resnet54S,
339
+ pre_downsampling=False,
340
+ pretrained_weights_path=os.path.join(
341
+ model_weights_base_path,
342
+ "ffnet54S/ffnet54S_BBB_cityscapes_state_dict_quarts.pth",
343
+ ),
344
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
345
+ )
346
+
347
+
348
+ @register_model
349
+ def segmentation_ffnet40S_BBB_mobile():
350
+ return create_ffnet(
351
+ ffnet_head_type="B_mobile",
352
+ task="segmentation_B",
353
+ num_classes=19,
354
+ model_name="ffnnet40S_BBB_mobile",
355
+ backbone=resnet.Resnet40S,
356
+ pre_downsampling=False,
357
+ pretrained_weights_path=os.path.join(
358
+ model_weights_base_path,
359
+ "ffnet40S/ffnet40S_BBB_cityscapes_state_dict_quarts.pth",
360
+ ),
361
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
362
+ )
363
+
364
+
365
+ @register_model
366
+ def segmentation_ffnet150S_BCC_mobile():
367
+ return create_ffnet(
368
+ ffnet_head_type="C_mobile",
369
+ task="segmentation_C",
370
+ num_classes=19,
371
+ model_name="ffnnet150S_BCC_mobile",
372
+ backbone=resnet.Resnet150S,
373
+ pre_downsampling=False,
374
+ pretrained_weights_path=os.path.join(
375
+ model_weights_base_path,
376
+ "ffnet150S/ffnet150S_BCC_cityscapes_state_dict_quarts.pth",
377
+ ),
378
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
379
+ )
380
+
381
+
382
+ @register_model
383
+ def segmentation_ffnet86S_BCC_mobile():
384
+ return create_ffnet(
385
+ ffnet_head_type="C_mobile",
386
+ task="segmentation_C",
387
+ num_classes=19,
388
+ model_name="ffnnet86S_BCC_mobile",
389
+ backbone=resnet.Resnet86S,
390
+ pre_downsampling=False,
391
+ pretrained_weights_path=os.path.join(
392
+ model_weights_base_path,
393
+ "ffnet86S/ffnet86S_BCC_cityscapes_state_dict_quarts.pth",
394
+ ),
395
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
396
+ )
397
+
398
+
399
+ @register_model
400
+ def segmentation_ffnet78S_BCC_mobile():
401
+ return create_ffnet(
402
+ ffnet_head_type="C_mobile",
403
+ task="segmentation_C",
404
+ num_classes=19,
405
+ model_name="ffnnet78S_BCC_mobile",
406
+ backbone=resnet.Resnet78S,
407
+ pre_downsampling=False,
408
+ pretrained_weights_path=os.path.join(
409
+ model_weights_base_path,
410
+ "ffnet78S/ffnet78S_BCC_cityscapes_state_dict_quarts.pth",
411
+ ),
412
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
413
+ )
414
+
415
+
416
+ @register_model
417
+ def segmentation_ffnet54S_BCC_mobile():
418
+ return create_ffnet(
419
+ ffnet_head_type="C_mobile",
420
+ task="segmentation_C",
421
+ num_classes=19,
422
+ model_name="ffnnet54S_BCC_mobile",
423
+ backbone=resnet.Resnet54S,
424
+ pre_downsampling=False,
425
+ pretrained_weights_path=os.path.join(
426
+ model_weights_base_path,
427
+ "ffnet54S/ffnet54S_BCC_cityscapes_state_dict_quarts.pth",
428
+ ),
429
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
430
+ )
431
+
432
+
433
+ @register_model
434
+ def segmentation_ffnet40S_BCC_mobile():
435
+ return create_ffnet(
436
+ ffnet_head_type="C_mobile",
437
+ task="segmentation_C",
438
+ num_classes=19,
439
+ model_name="ffnnet40S_BCC_mobile",
440
+ backbone=resnet.Resnet40S,
441
+ pre_downsampling=False,
442
+ pretrained_weights_path=os.path.join(
443
+ model_weights_base_path,
444
+ "ffnet40S/ffnet40S_BCC_cityscapes_state_dict_quarts.pth",
445
+ ),
446
+ strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True
447
+ )
448
+
449
+
450
+ ##########################################################################################
451
+ ##### Classification models with an FFNet structure. Primarily intended for imagenet
452
+ ##### initialization of FFNet.
453
+ ##### See the README for the hyperparameters for training the classification models
454
+ ##########################################################################################
455
+ @register_model
456
+ def classification_ffnet150S_BBX_mobile():
457
+ return create_ffnet(
458
+ ffnet_head_type="B_mobile",
459
+ task="classification",
460
+ num_classes=1000,
461
+ model_name="ffnnet150S_BBX_mobile",
462
+ backbone=resnet.Resnet150S,
463
+ pretrained_weights_path=os.path.join(
464
+ model_weights_base_path,
465
+ "ffnet150S/ffnet150S_BBX_imagenet_state_dict_quarts.pth",
466
+ ),
467
+ strict_loading=True,
468
+ )
469
+
470
+
471
+ @register_model
472
+ def classification_ffnet86S_BBX_mobile():
473
+ return create_ffnet(
474
+ ffnet_head_type="B_mobile",
475
+ task="classification",
476
+ num_classes=1000,
477
+ model_name="ffnnet86S_BBX_mobile",
478
+ backbone=resnet.Resnet86S,
479
+ pretrained_weights_path=os.path.join(
480
+ model_weights_base_path,
481
+ "ffnet86S/ffnet86S_BBX_imagenet_state_dict_quarts.pth",
482
+ ),
483
+ strict_loading=True,
484
+ )
485
+
486
+
487
+ @register_model
488
+ def classification_ffnet78S_BBX_mobile():
489
+ return create_ffnet(
490
+ ffnet_head_type="B_mobile",
491
+ task="classification",
492
+ num_classes=1000,
493
+ model_name="ffnnet78S_BBX_mobile",
494
+ backbone=resnet.Resnet78S,
495
+ pretrained_weights_path=os.path.join(
496
+ model_weights_base_path,
497
+ "ffnet78S/ffnet78S_BBX_imagenet_state_dict_quarts.pth",
498
+ ),
499
+ strict_loading=True,
500
+ )
501
+
502
+
503
+ @register_model
504
+ def classification_ffnet54S_BBX_mobile():
505
+ return create_ffnet(
506
+ ffnet_head_type="B_mobile",
507
+ task="classification",
508
+ num_classes=1000,
509
+ model_name="ffnnet54S_BBX_mobile",
510
+ backbone=resnet.Resnet54S,
511
+ pretrained_weights_path=os.path.join(
512
+ model_weights_base_path,
513
+ "ffnet54S/ffnet54S_BBX_imagenet_state_dict_quarts.pth",
514
+ ),
515
+ strict_loading=True,
516
+ )
517
+
518
+
519
+ @register_model
520
+ def classification_ffnet40S_BBX_mobile():
521
+ return create_ffnet(
522
+ ffnet_head_type="B_mobile",
523
+ task="classification",
524
+ num_classes=1000,
525
+ model_name="ffnnet40S_BBX_mobile",
526
+ backbone=resnet.Resnet40S,
527
+ pretrained_weights_path=os.path.join(
528
+ model_weights_base_path,
529
+ "ffnet40S/ffnet40S_BBX_imagenet_state_dict_quarts.pth",
530
+ ),
531
+ strict_loading=True,
532
+ )
533
+
534
+
535
+ ##########################################################################################
536
+ ##### This is an example of how the FFNet models intended for 1024x512 images
537
+ ##### would be initialized for training on cityscapes with 2048x1024 images
538
+ ##### Set up the rest accordingly
539
+ ##########################################################################################
540
+ @register_model
541
+ def segmentation_ffnet78S_BCC_mobile_pre_down_train():
542
+ return create_ffnet(
543
+ ffnet_head_type="C_mobile",
544
+ task="segmentation_C",
545
+ num_classes=19,
546
+ model_name="ffnnet78S_BCC_mobile_pre_down",
547
+ backbone=resnet.Resnet78S,
548
+ pre_downsampling=True,
549
+ pretrained_weights_path=os.path.join(
550
+ model_weights_base_path,
551
+ "ffnet78S/ffnet78S_BBX_imagenet_state_dict_quarts.pth",
552
+ ),
553
+ pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
554
+ strict_loading=False, # Strict loading is false here because the weights are going into a model with pre_downsampling=True
555
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ #########################################################################################
5
+ # Part of the code in UpBranch adapted from https://github.com/feinanshan/FANet/blob/master/Testing/models/fanet/fanet.py
6
+ #
7
+ # The original source code was made available under the following license
8
+ # MIT License
9
+ #
10
+ # Copyright (c) 2021 Ping Hu
11
+ #
12
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ # of this software and associated documentation files (the "Software"), to deal
14
+ # in the Software without restriction, including without limitation the rights
15
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ # copies of the Software, and to permit persons to whom the Software is
17
+ # furnished to do so, subject to the following conditions:
18
+ #
19
+ # The above copyright notice and this permission notice shall be included in all
20
+ # copies or substantial portions of the Software.
21
+ #
22
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ # SOFTWARE.
29
+
30
+
31
+ # Code for ClassificationHead adapted from https://github.com/HRNet/HRNet-Image-Classification
32
+
33
+ # The original source code was made available under the following license
34
+ # MIT License
35
+ # Copyright (c) 2019 Microsoft Corporation
36
+ #
37
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
38
+ # of this software and associated documentation files (the "Software"), to deal
39
+ # in the Software without restriction, including without limitation the rights
40
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
41
+ # copies of the Software, and to permit persons to whom the Software is
42
+ # furnished to do so, subject to the following conditions:
43
+ #
44
+ # The above copyright notice and this permission notice shall be included in all
45
+ # copies or substantial portions of the Software.
46
+ #
47
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
48
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
49
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
50
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
52
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
53
+ # SOFTWARE.
54
+ #########################################################################################
55
+
56
+
57
+ import math
58
+ import torch
59
+ from torch import nn
60
+ from torch.nn import functional as F
61
+ from models.utils import model_weight_initializer
62
+ import torchvision.transforms as T
63
+ from scipy import ndimage
64
+
65
+ # The modules here currently assume that there are always 4 branches.
66
+ # It would need to be adapted in order to support a variable number of branches
67
+
68
+ # TODO : Pass BN momentum through config
69
+ BN_MOMENTUM = 0.1
70
+ gpu_up_kwargs = {"mode": "bilinear", "align_corners": True}
71
+ mobile_up_kwargs = {"mode": "nearest"}
72
+ relu_inplace = True
73
+
74
+ # TODO : Replace functional interpolate operations with upsample modules
75
+
76
+
77
+ class ConvBNReLU(nn.Module):
78
+ def __init__(
79
+ self,
80
+ in_chan,
81
+ out_chan,
82
+ ks=3,
83
+ stride=1,
84
+ padding=1,
85
+ activation=nn.ReLU,
86
+ *args,
87
+ **kwargs,
88
+ ):
89
+ super(ConvBNReLU, self).__init__()
90
+ layers = [
91
+ nn.Conv2d(
92
+ in_chan,
93
+ out_chan,
94
+ kernel_size=ks,
95
+ stride=stride,
96
+ padding=padding,
97
+ bias=False,
98
+ ),
99
+ nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM),
100
+ ]
101
+ if activation:
102
+ layers.append(activation(inplace=relu_inplace))
103
+ self.layers = nn.Sequential(*layers)
104
+
105
+ def forward(self, x):
106
+ return self.layers(x)
107
+
108
+
109
+ class AdapterConv(nn.Module):
110
+ def __init__(
111
+ self, in_channels=[256, 512, 1024, 2048], out_channels=[64, 128, 256, 512]
112
+ ):
113
+ super(AdapterConv, self).__init__()
114
+ assert len(in_channels) == len(
115
+ out_channels
116
+ ), "Number of input and output branches should match"
117
+ self.adapter_conv = nn.ModuleList()
118
+
119
+ for k in range(len(in_channels)):
120
+ self.adapter_conv.append(
121
+ ConvBNReLU(in_channels[k], out_channels[k], ks=1, stride=1, padding=0),
122
+ )
123
+
124
+ def forward(self, x):
125
+ out = []
126
+ for k in range(len(self.adapter_conv)):
127
+ out.append(self.adapter_conv[k](x[k]))
128
+ return out
129
+
130
+
131
+ class UpsampleCat(nn.Module):
132
+ def __init__(self, upsample_kwargs=gpu_up_kwargs):
133
+ super(UpsampleCat, self).__init__()
134
+ self._up_kwargs = upsample_kwargs
135
+
136
+ def forward(self, x):
137
+ """Upsample and concatenate feature maps."""
138
+ assert isinstance(x, list) or isinstance(x, tuple)
139
+ # print(self._up_kwargs)
140
+ x0 = x[0]
141
+ _, _, H, W = x0.size()
142
+ for i in range(1, len(x)):
143
+ x0 = torch.cat([x0, F.interpolate(x[i], (H, W), **self._up_kwargs)], dim=1)
144
+ return x0
145
+
146
+
147
+ class UpBranch(nn.Module):
148
+ def __init__(
149
+ self,
150
+ in_channels=[64, 128, 256, 512],
151
+ out_channels=[128, 128, 128, 128],
152
+ upsample_kwargs=gpu_up_kwargs,
153
+ ):
154
+ super(UpBranch, self).__init__()
155
+
156
+ self._up_kwargs = upsample_kwargs
157
+
158
+ self.fam_32_sm = ConvBNReLU(
159
+ in_channels[3], out_channels[3], ks=3, stride=1, padding=1
160
+ )
161
+ self.fam_32_up = ConvBNReLU(
162
+ in_channels[3], in_channels[2], ks=1, stride=1, padding=0
163
+ )
164
+ self.fam_16_sm = ConvBNReLU(
165
+ in_channels[2], out_channels[2], ks=3, stride=1, padding=1
166
+ )
167
+ self.fam_16_up = ConvBNReLU(
168
+ in_channels[2], in_channels[1], ks=1, stride=1, padding=0
169
+ )
170
+ self.fam_8_sm = ConvBNReLU(
171
+ in_channels[1], out_channels[1], ks=3, stride=1, padding=1
172
+ )
173
+ self.fam_8_up = ConvBNReLU(
174
+ in_channels[1], in_channels[0], ks=1, stride=1, padding=0
175
+ )
176
+ self.fam_4 = ConvBNReLU(
177
+ in_channels[0], out_channels[0], ks=3, stride=1, padding=1
178
+ )
179
+
180
+ self.high_level_ch = sum(out_channels)
181
+ self.out_channels = out_channels
182
+
183
+ def forward(self, x):
184
+
185
+ feat4, feat8, feat16, feat32 = x
186
+
187
+ smfeat_32 = self.fam_32_sm(feat32)
188
+ upfeat_32 = self.fam_32_up(feat32)
189
+
190
+ _, _, H, W = feat16.size()
191
+ x = F.interpolate(upfeat_32, (H, W), **self._up_kwargs) + feat16
192
+ smfeat_16 = self.fam_16_sm(x)
193
+ upfeat_16 = self.fam_16_up(x)
194
+
195
+ _, _, H, W = feat8.size()
196
+ x = F.interpolate(upfeat_16, (H, W), **self._up_kwargs) + feat8
197
+ smfeat_8 = self.fam_8_sm(x)
198
+ upfeat_8 = self.fam_8_up(x)
199
+
200
+ _, _, H, W = feat4.size()
201
+ smfeat_4 = self.fam_4(
202
+ F.interpolate(upfeat_8, (H, W), **self._up_kwargs) + feat4
203
+ )
204
+
205
+ return smfeat_4, smfeat_8, smfeat_16, smfeat_32
206
+
207
+
208
+ class FFNetUpHead(nn.Module):
209
+ def __init__(
210
+ self,
211
+ in_chans,
212
+ use_adapter_conv=True,
213
+ head_type="B_mobile",
214
+ task="segmentation_A",
215
+ num_classes=19,
216
+ base_chans=[64, 128, 256, 512],
217
+ dropout_rate=None, # Only used for classification
218
+ *args,
219
+ **kwargs,
220
+ ):
221
+ super(FFNetUpHead, self).__init__()
222
+ layers = []
223
+ # base_chans = [64, 128, 128, 128]
224
+ if head_type.startswith("A"):
225
+ base_chans = [64, 128, 256, 512]
226
+ elif head_type.startswith("B"):
227
+ base_chans = [64, 128, 128, 256]
228
+ elif head_type.startswith("C"):
229
+ base_chans = [128, 128, 128, 128]
230
+
231
+ if use_adapter_conv:
232
+ layers.append(AdapterConv(in_chans, base_chans))
233
+ in_chans = base_chans[:]
234
+
235
+ if head_type == "A":
236
+ layers.append(UpBranch(in_chans))
237
+ elif head_type == "A_mobile":
238
+ layers.append(UpBranch(in_chans, upsample_kwargs=mobile_up_kwargs))
239
+ elif head_type == "B":
240
+ layers.append(UpBranch(in_chans, [96, 96, 64, 32]))
241
+ elif head_type == "B_mobile":
242
+ layers.append(
243
+ UpBranch(in_chans, [96, 96, 64, 32], upsample_kwargs=mobile_up_kwargs)
244
+ )
245
+ elif head_type == "C":
246
+ layers.append(UpBranch(in_chans, [128, 16, 16, 16]))
247
+ elif head_type == "C_mobile":
248
+ layers.append(
249
+ UpBranch(in_chans, [128, 16, 16, 16], upsample_kwargs=mobile_up_kwargs)
250
+ )
251
+ else:
252
+ raise ValueError(f"Unknown FFNetUpHead type {head_type}")
253
+
254
+ self.num_features = layers[-1].high_level_ch
255
+ self.num_multi_scale_features = layers[-1].out_channels
256
+
257
+ if task.startswith("segmentation"):
258
+ if "mobile" in head_type:
259
+ layers.append(UpsampleCat(mobile_up_kwargs))
260
+ else:
261
+ layers.append(UpsampleCat(gpu_up_kwargs))
262
+
263
+ # Gets single scale input
264
+ if "_C" in task:
265
+ mid_feat = 128
266
+ layers.append(
267
+ SegmentationHead_NoSigmoid_1x1(
268
+ self.num_features,
269
+ mid_feat,
270
+ num_outputs=num_classes,
271
+ )
272
+ )
273
+ elif "_B" in task:
274
+ mid_feat = 256
275
+ layers.append(
276
+ SegmentationHead_NoSigmoid_3x3(
277
+ self.num_features,
278
+ mid_feat,
279
+ num_outputs=num_classes,
280
+ )
281
+ )
282
+ elif "_A" in task:
283
+ mid_feat = 512
284
+ layers.append(
285
+ SegmentationHead_NoSigmoid_1x1(
286
+ self.num_features,
287
+ mid_feat,
288
+ num_outputs=num_classes,
289
+ )
290
+ )
291
+ else:
292
+ raise ValueError(f"Unknown Segmentation Head {task}")
293
+
294
+ elif task == "classification":
295
+ # Gets multi scale input
296
+ layers.append(
297
+ ClassificationHead(
298
+ self.num_multi_scale_features,
299
+ [128, 256, 512, 1024],
300
+ num_outputs=num_classes,
301
+ dropout_rate=dropout_rate,
302
+ )
303
+ )
304
+ self.layers = nn.Sequential(*layers)
305
+
306
+ def forward(self, x):
307
+ return self.layers(x)
308
+
309
+
310
+ class SimpleBottleneckBlock(nn.Module):
311
+ expansion = 4
312
+
313
+ def __init__(self, inplanes, planes, stride=1):
314
+ super(SimpleBottleneckBlock, self).__init__()
315
+ bn_mom = 0.1
316
+ bn_eps = 1e-5
317
+
318
+ self.downsample = None
319
+ if stride != 1 or inplanes != planes * self.expansion:
320
+ self.downsample = nn.Sequential(
321
+ nn.Conv2d(
322
+ inplanes,
323
+ planes * self.expansion,
324
+ kernel_size=1,
325
+ stride=stride,
326
+ bias=False,
327
+ ),
328
+ nn.BatchNorm2d(planes * self.expansion, momentum=bn_mom),
329
+ )
330
+
331
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
332
+ self.bn1 = nn.BatchNorm2d(planes, momentum=bn_mom)
333
+ self.conv2 = nn.Conv2d(
334
+ planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
335
+ )
336
+ self.bn2 = nn.BatchNorm2d(planes, momentum=bn_mom)
337
+ self.conv3 = nn.Conv2d(
338
+ planes, planes * self.expansion, kernel_size=1, bias=False
339
+ )
340
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=bn_mom)
341
+ self.relu = nn.ReLU(inplace=True)
342
+ self.stride = stride
343
+
344
+ def forward(self, x):
345
+ residual = x
346
+
347
+ out = self.conv1(x)
348
+ out = self.bn1(out)
349
+ out = self.relu(out)
350
+
351
+ out = self.conv2(out)
352
+ out = self.bn2(out)
353
+ out = self.relu(out)
354
+
355
+ out = self.conv3(out)
356
+ out = self.bn3(out)
357
+
358
+ if self.downsample is not None:
359
+ residual = self.downsample(x)
360
+
361
+ out += residual
362
+ out = self.relu(out)
363
+
364
+ return out
365
+
366
+
367
+ class ClassificationHead(nn.Module):
368
+ def __init__(
369
+ self,
370
+ pre_head_channels,
371
+ head_channels=[128, 256, 512, 1024],
372
+ num_outputs=1,
373
+ dropout_rate=None,
374
+ ):
375
+ super(ClassificationHead, self).__init__()
376
+
377
+ self.dropout_rate = dropout_rate
378
+ bn_mom = 0.1
379
+ bn_eps = 1e-5
380
+ head_block_type = SimpleBottleneckBlock
381
+ head_expansion = 4
382
+
383
+ expansion_layers = []
384
+ for i, pre_head_channel in enumerate(pre_head_channels):
385
+ expansion_layer = head_block_type(
386
+ pre_head_channel,
387
+ int(head_channels[i] / head_expansion),
388
+ )
389
+ expansion_layers.append(expansion_layer)
390
+ self.expansion_layers = nn.ModuleList(expansion_layers)
391
+
392
+ # downsampling modules
393
+ downsampling_layers = []
394
+ for i in range(len(pre_head_channels) - 1):
395
+ input_channels = head_channels[i]
396
+ output_channels = head_channels[i + 1]
397
+
398
+ downsampling_layer = nn.Sequential(
399
+ nn.Conv2d(
400
+ in_channels=input_channels,
401
+ out_channels=output_channels,
402
+ kernel_size=3,
403
+ stride=2,
404
+ padding=1,
405
+ ),
406
+ nn.BatchNorm2d(output_channels, momentum=bn_mom),
407
+ nn.ReLU(),
408
+ )
409
+
410
+ downsampling_layers.append(downsampling_layer)
411
+ self.downsampling_layers = nn.ModuleList(downsampling_layers)
412
+
413
+ self.final_layer = nn.Sequential(
414
+ nn.Conv2d(
415
+ in_channels=head_channels[-1],
416
+ out_channels=2048,
417
+ kernel_size=1,
418
+ stride=1,
419
+ padding=0,
420
+ ),
421
+ nn.BatchNorm2d(2048, momentum=bn_mom),
422
+ nn.ReLU(inplace=True),
423
+ )
424
+
425
+ self.adaptive_avg_pool = nn.AdaptiveAvgPool2d(1)
426
+ self.classifier = nn.Linear(
427
+ 2048,
428
+ num_outputs,
429
+ )
430
+
431
+ def forward(self, x):
432
+
433
+ next_x = self.expansion_layers[0](x[0])
434
+ for i in range(len(self.downsampling_layers)):
435
+ next_x = self.expansion_layers[i + 1](x[i + 1]) + self.downsampling_layers[
436
+ i
437
+ ](next_x)
438
+ x = next_x
439
+
440
+ x = self.final_layer(x)
441
+ x = self.adaptive_avg_pool(x).squeeze()
442
+
443
+ if self.dropout_rate:
444
+ x = torch.nn.functional.dropout(
445
+ x, p=self._model_config.dropout_rate, training=self.training
446
+ )
447
+
448
+ x = self.classifier(x)
449
+ return x
450
+
451
+
452
+ class SegmentationHead_NoSigmoid_3x3(nn.Module):
453
+ def __init__(
454
+ self, backbone_channels, mid_channels=256, kernel_size=3, num_outputs=1
455
+ ):
456
+ super(SegmentationHead_NoSigmoid_3x3, self).__init__()
457
+ last_inp_channels = backbone_channels
458
+ self.last_layer = nn.Sequential(
459
+ nn.Conv2d(
460
+ in_channels=last_inp_channels,
461
+ out_channels=mid_channels,
462
+ kernel_size=kernel_size,
463
+ stride=1,
464
+ padding=kernel_size // 2,
465
+ ),
466
+ nn.BatchNorm2d(mid_channels, momentum=BN_MOMENTUM),
467
+ nn.ReLU(inplace=relu_inplace),
468
+ nn.Conv2d(
469
+ in_channels=mid_channels,
470
+ out_channels=num_outputs,
471
+ kernel_size=kernel_size,
472
+ stride=1,
473
+ padding=kernel_size // 2,
474
+ ),
475
+ )
476
+
477
+ def forward(self, x):
478
+ x = self.last_layer(x)
479
+ return x
480
+
481
+
482
+ class SegmentationHead_NoSigmoid_1x1(nn.Module):
483
+ def __init__(
484
+ self, backbone_channels, mid_channels=512, kernel_size=3, num_outputs=1
485
+ ):
486
+ super(SegmentationHead_NoSigmoid_1x1, self).__init__()
487
+ last_inp_channels = backbone_channels
488
+ self.last_layer = nn.Sequential(
489
+ nn.Conv2d(
490
+ in_channels=last_inp_channels,
491
+ out_channels=mid_channels,
492
+ kernel_size=kernel_size,
493
+ stride=1,
494
+ padding=kernel_size // 2,
495
+ ),
496
+ nn.BatchNorm2d(mid_channels, momentum=BN_MOMENTUM),
497
+ nn.ReLU(inplace=relu_inplace),
498
+ nn.Conv2d(
499
+ in_channels=mid_channels,
500
+ out_channels=num_outputs,
501
+ kernel_size=1,
502
+ stride=1,
503
+ padding=0,
504
+ ),
505
+ )
506
+
507
+ def forward(self, x):
508
+ x = self.last_layer(x)
509
+ return x
510
+
511
+
512
+ class GaussianConv2D(nn.Module):
513
+ """
514
+ Gaussian smoothing + downsampling, applied independently per channel
515
+ THIS IS NOT MEANT FOR USE ON MOBILE. MIGHT BE HORRIBLY SLOW
516
+ """
517
+
518
+ def __init__(self, channels, kernel_size, sigma, stride=1):
519
+ super(GaussianConv2D, self).__init__()
520
+ assert isinstance(
521
+ kernel_size, int
522
+ ), "Specify kernel size as int. Both dimensions will get the same kernel size"
523
+ assert isinstance(sigma, float), "Specify sigma as float. Anisotropic gaussian"
524
+
525
+ kernel = torch.zeros(kernel_size, kernel_size)
526
+ mean_loc = int((kernel_size - 1) / 2) # Because 0 indexed
527
+ kernel[mean_loc, mean_loc] = 1
528
+ kernel = torch.from_numpy(ndimage.gaussian_filter(kernel.numpy(), sigma=sigma))
529
+
530
+ # Make a dwise conv out of the kernel
531
+ # Weights of shape out_channels, in_channels/groups, k, k
532
+ kernel = kernel.view(1, 1, kernel_size, kernel_size)
533
+ kernel = kernel.repeat(channels, 1, 1, 1)
534
+
535
+ self.conv = F.conv2d
536
+ # Register the kernel buffer instead of as a parameter, so that the training doesn't
537
+ # happily update it
538
+ self.register_buffer("weight", kernel)
539
+ self.channels = channels
540
+ self.stride = stride
541
+
542
+ def forward(self, input):
543
+ return self.conv(
544
+ input, weight=self.weight, groups=self.channels, stride=self.stride
545
+ )
546
+
547
+
548
+ class FFNet(nn.Module):
549
+ def __init__(
550
+ self,
551
+ ffnet_head_type="A",
552
+ num_classes=19,
553
+ task="segmentation_A",
554
+ use_adapter_convs=True,
555
+ backbone=None,
556
+ pre_downsampling=False,
557
+ model_name="default",
558
+ dropout_rate=None,
559
+ **kwargs,
560
+ ):
561
+ super(FFNet, self).__init__()
562
+ self.backbone_model = backbone()
563
+ branch_chans = self.backbone_model.out_channels
564
+ self.use_adapter_convs = use_adapter_convs
565
+ self.ffnet_head_type = ffnet_head_type
566
+ self.task = task
567
+ self.head = FFNetUpHead(
568
+ branch_chans,
569
+ use_adapter_conv=use_adapter_convs,
570
+ head_type=ffnet_head_type,
571
+ num_classes=num_classes,
572
+ task=task,
573
+ dropout_rate=dropout_rate,
574
+ )
575
+ self.model_name = model_name
576
+ # Pre-downsampling is used while training models that use 1024x512 image sizes rather than 2048x1024.
577
+ self.pre_downsampling = pre_downsampling
578
+ if self.pre_downsampling:
579
+ self.smoothing = GaussianConv2D(
580
+ channels=3, kernel_size=5, sigma=0.7, stride=2
581
+ )
582
+
583
+ def forward(self, x):
584
+ if self.pre_downsampling:
585
+ x = self.smooth_and_downsample_input(x)
586
+ x = self.backbone_model(x)
587
+ return self.head(x)
588
+
589
+ def smooth_and_downsample_input(self, x):
590
+ x = F.pad(x, (0, 0, 1, 1), mode="reflect")
591
+ return self.smoothing(x)
592
+
593
+ def init_model(
594
+ self, pretrained_path=None, strict_loading=True, backbone_only=False
595
+ ):
596
+ print(f"Initializing {self.model_name} weights")
597
+ self.apply(model_weight_initializer)
598
+ if pretrained_path:
599
+ pretrained_dict = torch.load(
600
+ pretrained_path, map_location={"cuda:0": "cpu"}
601
+ )
602
+ if backbone_only:
603
+ backbone_dict = {}
604
+ for k, v in pretrained_dict.items():
605
+ if k.startswith("backbone_model"):
606
+ backbone_dict[k] = v
607
+ self.load_state_dict(backbone_dict, strict=strict_loading)
608
+ else:
609
+ self.load_state_dict(pretrained_dict, strict=strict_loading)
610
+ else:
611
+ self.backbone_model.load_weights()
612
+
613
+
614
+ def create_ffnet(
615
+ pretrained=True,
616
+ imagenet_backbone_pretrained=True,
617
+ pretrained_weights_path=None,
618
+ pretrained_backbone_only=False,
619
+ ffnet_head_type="A",
620
+ strict_loading=True,
621
+ num_classes=19,
622
+ task="segmentation_A",
623
+ model_name="ffnnet122NS_CCC",
624
+ backbone=None,
625
+ pre_downsampling=False,
626
+ dropout_rate=None,
627
+ **kwargs,
628
+ ):
629
+
630
+ if pretrained_weights_path:
631
+ model_wghts = pretrained_weights_path
632
+ pretrained = True
633
+ if imagenet_backbone_pretrained:
634
+ pretrained = True
635
+
636
+ model = FFNet(
637
+ ffnet_head_type=ffnet_head_type,
638
+ num_classes=num_classes,
639
+ task=task,
640
+ use_adapter_convs=True,
641
+ backbone=backbone,
642
+ pre_downsampling=pre_downsampling,
643
+ model_name=model_name,
644
+ dropout_rate=dropout_rate,
645
+ )
646
+
647
+ model.apply(model_weight_initializer)
648
+ if pretrained:
649
+ if pretrained_weights_path:
650
+ print("Loading pretrained model state dict from {}".format(model_wghts))
651
+ model.init_model(
652
+ model_wghts,
653
+ strict_loading=strict_loading,
654
+ backbone_only=pretrained_backbone_only,
655
+ )
656
+ else:
657
+ print(
658
+ "No model weights provided, attempting to load imagenet pretrained backbone..."
659
+ )
660
+ model.init_model()
661
+
662
+ model.eval()
663
+ return model
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import os
5
+ from functools import partial
6
+
7
+ import torch
8
+
9
+
10
+ from models import resnet
11
+
12
+ import os
13
+ import sys
14
+ import numpy as np
15
+
16
+ import torch.nn as nn
17
+ import torch._utils
18
+ import torch.nn.functional as F
19
+
20
+ from models.ffnet_blocks import create_ffnet
21
+ from models.model_registry import register_model
22
+ from config import model_weights_base_path
23
+
24
+
25
+ ##########################################################################################
26
+ ##### 4-Stage GPU FFNets with ResNet backbone.
27
+ ##### These are trained for use with image sizes of 2048x1024
28
+ ##### and output a segmentation map of 512x256 pixels
29
+ ##########################################################################################
30
+ @register_model
31
+ def segmentation_ffnet150_AAA():
32
+ return create_ffnet(
33
+ ffnet_head_type="A",
34
+ task="segmentation_A",
35
+ num_classes=19,
36
+ model_name="ffnnet150_AAA",
37
+ backbone=resnet.Resnet150,
38
+ pre_downsampling=False,
39
+ pretrained_weights_path=os.path.join(
40
+ model_weights_base_path,
41
+ "ffnet150/ffnet150_AAA_cityscapes_state_dict_quarts.pth",
42
+ ),
43
+ strict_loading=True,
44
+ )
45
+
46
+
47
+ @register_model
48
+ def segmentation_ffnet134_AAA():
49
+ return create_ffnet(
50
+ ffnet_head_type="A",
51
+ task="segmentation_A",
52
+ num_classes=19,
53
+ model_name="ffnnet134_AAA",
54
+ backbone=resnet.Resnet134,
55
+ pre_downsampling=False,
56
+ pretrained_weights_path=os.path.join(
57
+ model_weights_base_path,
58
+ "ffnet134/ffnet134_AAA_cityscapes_state_dict_quarts.pth",
59
+ ),
60
+ strict_loading=True,
61
+ )
62
+
63
+
64
+ @register_model
65
+ def segmentation_ffnet101_AAA():
66
+ return create_ffnet(
67
+ ffnet_head_type="A",
68
+ task="segmentation_A",
69
+ num_classes=19,
70
+ model_name="ffnnet101_AAA",
71
+ backbone=resnet.Resnet101,
72
+ pre_downsampling=False,
73
+ pretrained_weights_path=os.path.join(
74
+ model_weights_base_path,
75
+ "ffnet101/ffnet101_AAA_cityscapes_state_dict_quarts.pth",
76
+ ),
77
+ strict_loading=True,
78
+ )
79
+
80
+
81
+ @register_model
82
+ def segmentation_ffnet86_AAA():
83
+ return create_ffnet(
84
+ ffnet_head_type="A",
85
+ task="segmentation_A",
86
+ num_classes=19,
87
+ model_name="ffnnet86_AAA",
88
+ backbone=resnet.Resnet86,
89
+ pre_downsampling=False,
90
+ pretrained_weights_path=os.path.join(
91
+ model_weights_base_path,
92
+ "ffnet86/ffnet86_AAA_cityscapes_state_dict_quarts.pth",
93
+ ),
94
+ strict_loading=True,
95
+ )
96
+
97
+
98
+ @register_model
99
+ def segmentation_ffnet56_AAA():
100
+ return create_ffnet(
101
+ ffnet_head_type="A",
102
+ task="segmentation_A",
103
+ num_classes=19,
104
+ model_name="ffnnet56_AAA",
105
+ backbone=resnet.Resnet56,
106
+ pre_downsampling=False,
107
+ pretrained_weights_path=os.path.join(
108
+ model_weights_base_path,
109
+ "ffnet56/ffnet56_AAA_cityscapes_state_dict_quarts.pth",
110
+ ),
111
+ strict_loading=True,
112
+ )
113
+
114
+
115
+ @register_model
116
+ def segmentation_ffnet50_AAA():
117
+ return create_ffnet(
118
+ ffnet_head_type="A",
119
+ task="segmentation_A",
120
+ num_classes=19,
121
+ model_name="ffnnet50_AAA",
122
+ backbone=resnet.Resnet50,
123
+ pre_downsampling=False,
124
+ pretrained_weights_path=os.path.join(
125
+ model_weights_base_path,
126
+ "ffnet50/ffnet50_AAA_cityscapes_state_dict_quarts.pth",
127
+ ),
128
+ strict_loading=True,
129
+ )
130
+
131
+
132
+ @register_model
133
+ def segmentation_ffnet34_AAA():
134
+ return create_ffnet(
135
+ ffnet_head_type="A",
136
+ task="segmentation_A",
137
+ num_classes=19,
138
+ model_name="ffnnet34_AAA",
139
+ backbone=resnet.Resnet34,
140
+ pre_downsampling=False,
141
+ pretrained_weights_path=os.path.join(
142
+ model_weights_base_path,
143
+ "ffnet34/ffnet34_AAA_cityscapes_state_dict_quarts.pth",
144
+ ),
145
+ strict_loading=True,
146
+ )
147
+
148
+
149
+ @register_model
150
+ def segmentation_ffnet150_ABB():
151
+ return create_ffnet(
152
+ ffnet_head_type="B",
153
+ task="segmentation_B",
154
+ num_classes=19,
155
+ model_name="ffnnet150_ABB",
156
+ backbone=resnet.Resnet150,
157
+ pre_downsampling=False,
158
+ pretrained_weights_path=os.path.join(
159
+ model_weights_base_path,
160
+ "ffnet150/ffnet150_ABB_cityscapes_state_dict_quarts.pth",
161
+ ),
162
+ strict_loading=True,
163
+ )
164
+
165
+
166
+ @register_model
167
+ def segmentation_ffnet86_ABB():
168
+ return create_ffnet(
169
+ ffnet_head_type="B",
170
+ task="segmentation_B",
171
+ num_classes=19,
172
+ model_name="ffnnet86_ABB",
173
+ backbone=resnet.Resnet86,
174
+ pre_downsampling=False,
175
+ pretrained_weights_path=os.path.join(
176
+ model_weights_base_path,
177
+ "ffnet86/ffnet86_ABB_cityscapes_state_dict_quarts.pth",
178
+ ),
179
+ strict_loading=True,
180
+ )
181
+
182
+
183
+ @register_model
184
+ def segmentation_ffnet56_ABB():
185
+ return create_ffnet(
186
+ ffnet_head_type="B",
187
+ task="segmentation_B",
188
+ num_classes=19,
189
+ model_name="ffnnet56_ABB",
190
+ backbone=resnet.Resnet56,
191
+ pre_downsampling=False,
192
+ pretrained_weights_path=os.path.join(
193
+ model_weights_base_path,
194
+ "ffnet56/ffnet56_ABB_cityscapes_state_dict_quarts.pth",
195
+ ),
196
+ strict_loading=True,
197
+ )
198
+
199
+
200
+ @register_model
201
+ def segmentation_ffnet34_ABB():
202
+ return create_ffnet(
203
+ ffnet_head_type="B",
204
+ task="segmentation_B",
205
+ num_classes=19,
206
+ model_name="ffnnet34_ABB",
207
+ backbone=resnet.Resnet34,
208
+ pre_downsampling=False,
209
+ pretrained_weights_path=os.path.join(
210
+ model_weights_base_path,
211
+ "ffnet34/ffnet34_ABB_cityscapes_state_dict_quarts.pth",
212
+ ),
213
+ strict_loading=True,
214
+ )
215
+
216
+
217
+ ##########################################################################################
218
+ ##### This is an example of how these FFNet models would be initialized for training on
219
+ ##### cityscapes with 2048x1024 images
220
+ ##########################################################################################
221
+ @register_model
222
+ def segmentation_ffnet150_AAA_train():
223
+ return create_ffnet(
224
+ ffnet_head_type="A",
225
+ task="segmentation_A",
226
+ num_classes=19,
227
+ model_name="ffnnet150_AAA",
228
+ backbone=resnet.Resnet150,
229
+ pretrained_weights_path=os.path.join(
230
+ model_weights_base_path,
231
+ "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
232
+ ),
233
+ pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
234
+ strict_loading=False,
235
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import os
5
+ from functools import partial
6
+
7
+ import torch
8
+
9
+
10
+ from models import resnet
11
+
12
+ import os
13
+ import sys
14
+ import numpy as np
15
+
16
+ import torch.nn as nn
17
+ import torch._utils
18
+ import torch.nn.functional as F
19
+
20
+ from models.ffnet_blocks import create_ffnet
21
+ from models.model_registry import register_model
22
+ from config import model_weights_base_path
23
+
24
+
25
+ ##########################################################################################
26
+ ##### 4-Stage GPU FFNets with ResNet backbone.
27
+ ##### These are trained for use with image sizes of 2048x1024
28
+ ##### and output a segmentation map of 256x128 pixels
29
+ ##########################################################################################
30
+ @register_model
31
+ def segmentation_ffnet150_dAAA():
32
+ return create_ffnet(
33
+ ffnet_head_type="A",
34
+ task="segmentation_A",
35
+ num_classes=19,
36
+ model_name="ffnnet150_dAAA",
37
+ backbone=resnet.Resnet150_D,
38
+ pre_downsampling=False,
39
+ pretrained_weights_path=os.path.join(
40
+ model_weights_base_path,
41
+ "ffnet150/ffnet150_dAAA_cityscapes_state_dict_quarts.pth",
42
+ ),
43
+ strict_loading=True,
44
+ )
45
+
46
+
47
+ @register_model
48
+ def segmentation_ffnet134_dAAA():
49
+ return create_ffnet(
50
+ ffnet_head_type="A",
51
+ task="segmentation_A",
52
+ num_classes=19,
53
+ model_name="ffnnet134_dAAA",
54
+ backbone=resnet.Resnet134_D,
55
+ pre_downsampling=False,
56
+ pretrained_weights_path=os.path.join(
57
+ model_weights_base_path,
58
+ "ffnet134/ffnet134_dAAA_cityscapes_state_dict_quarts.pth",
59
+ ),
60
+ strict_loading=True,
61
+ )
62
+
63
+
64
+ @register_model
65
+ def segmentation_ffnet101_dAAA():
66
+ return create_ffnet(
67
+ ffnet_head_type="A",
68
+ task="segmentation_A",
69
+ num_classes=19,
70
+ model_name="ffnnet101_dAAA",
71
+ backbone=resnet.Resnet101_D,
72
+ pre_downsampling=False,
73
+ pretrained_weights_path=os.path.join(
74
+ model_weights_base_path,
75
+ "ffnet101/ffnet101_dAAA_cityscapes_state_dict_quarts.pth",
76
+ ),
77
+ strict_loading=True,
78
+ )
79
+
80
+
81
+ @register_model
82
+ def segmentation_ffnet86_dAAA():
83
+ return create_ffnet(
84
+ ffnet_head_type="A",
85
+ task="segmentation_A",
86
+ num_classes=19,
87
+ model_name="ffnnet86_dAAA",
88
+ backbone=resnet.Resnet86_D,
89
+ pre_downsampling=False,
90
+ pretrained_weights_path=os.path.join(
91
+ model_weights_base_path,
92
+ "ffnet86/ffnet86_dAAA_cityscapes_state_dict_quarts.pth",
93
+ ),
94
+ strict_loading=True,
95
+ )
96
+
97
+
98
+ @register_model
99
+ def segmentation_ffnet56_dAAA():
100
+ return create_ffnet(
101
+ ffnet_head_type="A",
102
+ task="segmentation_A",
103
+ num_classes=19,
104
+ model_name="ffnnet56_dAAA",
105
+ backbone=resnet.Resnet56_D,
106
+ pre_downsampling=False,
107
+ pretrained_weights_path=os.path.join(
108
+ model_weights_base_path,
109
+ "ffnet56/ffnet56_dAAA_cityscapes_state_dict_quarts.pth",
110
+ ),
111
+ strict_loading=True,
112
+ )
113
+
114
+
115
+ @register_model
116
+ def segmentation_ffnet50_dAAA():
117
+ return create_ffnet(
118
+ ffnet_head_type="A",
119
+ task="segmentation_A",
120
+ num_classes=19,
121
+ model_name="ffnnet50_dAAA",
122
+ backbone=resnet.Resnet50_D,
123
+ pre_downsampling=False,
124
+ pretrained_weights_path=os.path.join(
125
+ model_weights_base_path,
126
+ "ffnet50/ffnet50_dAAA_cityscapes_state_dict_quarts.pth",
127
+ ),
128
+ strict_loading=True,
129
+ )
130
+
131
+
132
+ @register_model
133
+ def segmentation_ffnet34_dAAA():
134
+ return create_ffnet(
135
+ ffnet_head_type="A",
136
+ task="segmentation_A",
137
+ num_classes=19,
138
+ model_name="ffnnet34_dAAA",
139
+ backbone=resnet.Resnet34_D,
140
+ pre_downsampling=False,
141
+ pretrained_weights_path=os.path.join(
142
+ model_weights_base_path,
143
+ "ffnet34/ffnet34_dAAA_cityscapes_state_dict_quarts.pth",
144
+ ),
145
+ strict_loading=True,
146
+ )
147
+
148
+
149
+ @register_model
150
+ def segmentation_ffnet18_dAAA():
151
+ return create_ffnet(
152
+ ffnet_head_type="A",
153
+ task="segmentation_A",
154
+ num_classes=19,
155
+ model_name="ffnnet18_dAAA",
156
+ backbone=resnet.Resnet18_D,
157
+ pre_downsampling=False,
158
+ pretrained_weights_path=os.path.join(
159
+ model_weights_base_path,
160
+ "ffnet18/ffnet18_dAAA_cityscapes_state_dict_quarts.pth",
161
+ ),
162
+ strict_loading=True,
163
+ )
164
+
165
+
166
+ @register_model
167
+ def segmentation_ffnet150_dAAC():
168
+ return create_ffnet(
169
+ ffnet_head_type="A",
170
+ task="segmentation_C",
171
+ num_classes=19,
172
+ model_name="ffnnet150_dAAC",
173
+ backbone=resnet.Resnet150_D,
174
+ pre_downsampling=False,
175
+ pretrained_weights_path=os.path.join(
176
+ model_weights_base_path,
177
+ "ffnet150/ffnet150_dAAC_cityscapes_state_dict_quarts.pth",
178
+ ),
179
+ strict_loading=True,
180
+ )
181
+
182
+
183
+ @register_model
184
+ def segmentation_ffnet86_dAAC():
185
+ return create_ffnet(
186
+ ffnet_head_type="A",
187
+ task="segmentation_C",
188
+ num_classes=19,
189
+ model_name="ffnnet86_dAAC",
190
+ backbone=resnet.Resnet86_D,
191
+ pre_downsampling=False,
192
+ pretrained_weights_path=os.path.join(
193
+ model_weights_base_path,
194
+ "ffnet86/ffnet86_dAAC_cityscapes_state_dict_quarts.pth",
195
+ ),
196
+ strict_loading=True,
197
+ )
198
+
199
+
200
+ @register_model
201
+ def segmentation_ffnet34_dAAC():
202
+ return create_ffnet(
203
+ ffnet_head_type="A",
204
+ task="segmentation_C",
205
+ num_classes=19,
206
+ model_name="ffnnet34_dAAC",
207
+ backbone=resnet.Resnet34_D,
208
+ pre_downsampling=False,
209
+ pretrained_weights_path=os.path.join(
210
+ model_weights_base_path,
211
+ "ffnet34/ffnet34_dAAC_cityscapes_state_dict_quarts.pth",
212
+ ),
213
+ strict_loading=True,
214
+ )
215
+
216
+
217
+ @register_model
218
+ def segmentation_ffnet18_dAAC():
219
+ return create_ffnet(
220
+ ffnet_head_type="A",
221
+ task="segmentation_C",
222
+ num_classes=19,
223
+ model_name="ffnnet18_dAAC",
224
+ backbone=resnet.Resnet18_D,
225
+ pre_downsampling=False,
226
+ pretrained_weights_path=os.path.join(
227
+ model_weights_base_path,
228
+ "ffnet18/ffnet18_dAAC_cityscapes_state_dict_quarts.pth",
229
+ ),
230
+ strict_loading=True,
231
+ )
232
+
233
+
234
+ ##########################################################################################
235
+ ##### Classification models with an FFNet structure. Primarily intended for imagenet
236
+ ##### initialization of FFNet.
237
+ ##### See the README for the hyperparameters for training the classification models
238
+ ##########################################################################################
239
+ @register_model
240
+ def classification_ffnet150_AAX():
241
+ return create_ffnet(
242
+ ffnet_head_type="A",
243
+ task="classification",
244
+ num_classes=1000,
245
+ model_name="ffnnet150_AAX",
246
+ backbone=resnet.Resnet150,
247
+ pretrained_weights_path=os.path.join(
248
+ model_weights_base_path,
249
+ "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
250
+ ),
251
+ strict_loading=True,
252
+ )
253
+
254
+
255
+ @register_model
256
+ def classification_ffnet134_AAX():
257
+ return create_ffnet(
258
+ ffnet_head_type="A",
259
+ task="classification",
260
+ num_classes=1000,
261
+ model_name="ffnnet134_AAX",
262
+ backbone=resnet.Resnet134,
263
+ pretrained_weights_path=os.path.join(
264
+ model_weights_base_path,
265
+ "ffnet134/ffnet134_AAX_imagenet_state_dict_quarts.pth",
266
+ ),
267
+ strict_loading=True,
268
+ )
269
+
270
+
271
+ @register_model
272
+ def classification_ffnet101_AAX():
273
+ return create_ffnet(
274
+ ffnet_head_type="A",
275
+ task="classification",
276
+ num_classes=1000,
277
+ model_name="ffnnet101_AAX",
278
+ backbone=resnet.Resnet101,
279
+ pretrained_weights_path=os.path.join(
280
+ model_weights_base_path,
281
+ "ffnet101/ffnet101_AAX_imagenet_state_dict_quarts.pth",
282
+ ),
283
+ strict_loading=True,
284
+ )
285
+
286
+
287
+ @register_model
288
+ def classification_ffnet86_AAX():
289
+ return create_ffnet(
290
+ ffnet_head_type="A",
291
+ task="classification",
292
+ num_classes=1000,
293
+ model_name="ffnnet86_AAX",
294
+ backbone=resnet.Resnet86,
295
+ pretrained_weights_path=os.path.join(
296
+ model_weights_base_path,
297
+ "ffnet86/ffnet86_AAX_imagenet_state_dict_quarts.pth",
298
+ ),
299
+ strict_loading=True,
300
+ )
301
+
302
+
303
+ @register_model
304
+ def classification_ffnet56_AAX():
305
+ return create_ffnet(
306
+ ffnet_head_type="A",
307
+ task="classification",
308
+ num_classes=1000,
309
+ model_name="ffnnet56_AAX",
310
+ backbone=resnet.Resnet56,
311
+ pretrained_weights_path=os.path.join(
312
+ model_weights_base_path,
313
+ "ffnet56/ffnet56_AAX_imagenet_state_dict_quarts.pth",
314
+ ),
315
+ strict_loading=True,
316
+ )
317
+
318
+
319
+ @register_model
320
+ def classification_ffnet50_AAX():
321
+ return create_ffnet(
322
+ ffnet_head_type="A",
323
+ task="classification",
324
+ num_classes=1000,
325
+ model_name="ffnnet50_AAX",
326
+ backbone=resnet.Resnet50,
327
+ pretrained_weights_path=os.path.join(
328
+ model_weights_base_path,
329
+ "ffnet50/ffnet50_AAX_imagenet_state_dict_quarts.pth",
330
+ ),
331
+ strict_loading=True,
332
+ )
333
+
334
+
335
+ @register_model
336
+ def classification_ffnet34_AAX():
337
+ return create_ffnet(
338
+ ffnet_head_type="A",
339
+ task="classification",
340
+ num_classes=1000,
341
+ model_name="ffnnet34_AAX",
342
+ backbone=resnet.Resnet34,
343
+ pretrained_weights_path=os.path.join(
344
+ model_weights_base_path,
345
+ "ffnet34/ffnet34_AAX_imagenet_state_dict_quarts.pth",
346
+ ),
347
+ strict_loading=True,
348
+ )
349
+
350
+
351
+ @register_model
352
+ def classification_ffnet18_AAX():
353
+ return create_ffnet(
354
+ ffnet_head_type="A",
355
+ task="classification",
356
+ num_classes=1000,
357
+ model_name="ffnnet18_AAX",
358
+ backbone=resnet.Resnet18,
359
+ pretrained_weights_path=os.path.join(
360
+ model_weights_base_path,
361
+ "ffnet18/ffnet18_AAX_imagenet_state_dict_quarts.pth",
362
+ ),
363
+ strict_loading=True,
364
+ )
365
+
366
+
367
+ ##########################################################################################
368
+ ##### This is an example of how these FFNet models would be initialized for training on
369
+ ##### cityscapes with 2048x1024 images
370
+ ##########################################################################################
371
+ @register_model
372
+ def segmentation_ffnet150_dAAC_train():
373
+ return create_ffnet(
374
+ ffnet_head_type="A",
375
+ task="segmentation_C",
376
+ num_classes=19,
377
+ model_name="ffnnet150_dAAC",
378
+ backbone=resnet.Resnet150_D,
379
+ pretrained_weights_path=os.path.join(
380
+ model_weights_base_path,
381
+ "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
382
+ ),
383
+ pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
384
+ strict_loading=False,
385
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import sys
5
+
6
+ _model_entrypoints = {}
7
+
8
+
9
+ def register_model(fn):
10
+ # lookup containing module
11
+ mod = sys.modules[fn.__module__]
12
+ # add model to __all__ in module
13
+ model_name = fn.__name__
14
+ if hasattr(mod, "__all__"):
15
+ mod.__all__.append(model_name)
16
+ else:
17
+ mod.__all__ = [model_name]
18
+
19
+ # add entries to registry dict/sets
20
+ _model_entrypoints[model_name] = fn
21
+ return fn
22
+
23
+
24
+ def model_entrypoint(model_name):
25
+ """Fetch a model entrypoint for specified model name"""
26
+ if model_name in _model_entrypoints:
27
+ return _model_entrypoints[model_name]
28
+ else:
29
+ raise RuntimeError(
30
+ f"Unknown model ({model_name}); known models are: "
31
+ f"{_model_entrypoints.keys()}"
32
+ )
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ #########################################################################
5
+ # Code adapted from https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
6
+
7
+ # The original source code was made available under the following license
8
+ # BSD 3-Clause License
9
+ #
10
+ # Copyright (c) Soumith Chintala 2016,
11
+ # All rights reserved.
12
+ #
13
+ # Redistribution and use in source and binary forms, with or without
14
+ # modification, are permitted provided that the following conditions are met:
15
+ #
16
+ # * Redistributions of source code must retain the above copyright notice, this
17
+ # list of conditions and the following disclaimer.
18
+ #
19
+ # * Redistributions in binary form must reproduce the above copyright notice,
20
+ # this list of conditions and the following disclaimer in the documentation
21
+ # and/or other materials provided with the distribution.
22
+ #
23
+ # * Neither the name of the copyright holder nor the names of its
24
+ # contributors may be used to endorse or promote products derived from
25
+ # this software without specific prior written permission.
26
+ #
27
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
31
+ # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
34
+ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
35
+ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
+ #########################################################################
38
+
39
+ #########################################################################
40
+ #### **The main takeaway is that simple FFNets made out of resnet backbones made using basic-block
41
+ #### **are just as competitive as complex architectures such as HRNet, DDRNet, FANet etc.
42
+
43
+ #### New and old ResNet backbones, designed for use with FFNet. These do not have a classification
44
+ #### head attached here. ImageNet training of these backbones is done as an FFNet with a classification
45
+ #### head attached. See ffnet.py and ffnet_blocks.py.
46
+ #### Also, these models do not make a distinction between GPU and mobile because the elements that we change
47
+ #### between the two are among the additional modules that FFNet adds.
48
+ #########################################################################
49
+ import torch
50
+
51
+ #### These are weights for the backbone when trained directly with a classification head attached at the end of the
52
+ #### backbone, and not as part of the FFNet structure. For a minor training accuracy advantage, one could use these
53
+ #### weights as the initialization for the relevant models in the new family of models,
54
+ #### but training from scratch works nearly equally well
55
+ model_paths = {
56
+ "resnet18": "/pretrained_weights/resnet18.pth",
57
+ "resnet34": "/pretrained_weights/resnet34.pth",
58
+ "resnet50": "/pretrained_weights/resnet50.pth",
59
+ "resnet101": "/pretrained_weights/resnet101.pth",
60
+ }
61
+
62
+ import torch.nn as nn
63
+ import torch._utils
64
+
65
+
66
+ BN_MOMENTUM = 0.1
67
+ relu_inplace = True
68
+
69
+
70
+ def conv3x3(in_planes, out_planes, stride=1):
71
+ """3x3 convolution with padding"""
72
+ return nn.Conv2d(
73
+ in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
74
+ )
75
+
76
+
77
+ def conv1x1(in_planes, out_planes, stride=1):
78
+ """1x1 convolution"""
79
+ return nn.Conv2d(
80
+ in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False
81
+ )
82
+
83
+
84
+ class BasicBlock(nn.Module):
85
+ expansion = 1
86
+
87
+ def __init__(self, in_chan, out_chan, stride=1):
88
+ super(BasicBlock, self).__init__()
89
+ self.conv1 = conv3x3(in_chan, out_chan, stride)
90
+ self.bn1 = nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM)
91
+ self.conv2 = conv3x3(out_chan, out_chan)
92
+ self.bn2 = nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM)
93
+ self.relu = nn.ReLU(inplace=relu_inplace)
94
+ self.downsample = None
95
+ if in_chan != out_chan or stride != 1:
96
+ self.downsample = nn.Sequential(
97
+ nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False),
98
+ nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM),
99
+ )
100
+
101
+ def forward(self, x):
102
+
103
+ out = self.conv1(x)
104
+ out = self.bn1(out)
105
+ out = self.relu(out)
106
+ out = self.conv2(out)
107
+ out = self.bn2(out)
108
+
109
+ shortcut = x
110
+ if self.downsample is not None:
111
+ shortcut = self.downsample(x)
112
+
113
+ out_ = shortcut + out
114
+ out_ = self.relu(out_)
115
+ return out_
116
+
117
+
118
+ class Bottleneck(nn.Module):
119
+ expansion = 4
120
+
121
+ def __init__(self, in_chan, out_chan, stride=1, base_width=64):
122
+ super(Bottleneck, self).__init__()
123
+ width = int(out_chan * (base_width / 64.0)) * 1
124
+ self.conv1 = conv1x1(in_chan, width)
125
+ self.bn1 = nn.BatchNorm2d(width, momentum=BN_MOMENTUM)
126
+ self.conv2 = conv3x3(width, width, stride)
127
+ self.bn2 = nn.BatchNorm2d(width, momentum=BN_MOMENTUM)
128
+ self.conv3 = conv1x1(width, out_chan * self.expansion)
129
+ self.bn3 = nn.BatchNorm2d(out_chan * self.expansion, momentum=BN_MOMENTUM)
130
+ self.relu = nn.ReLU(inplace=relu_inplace)
131
+ self.downsample = None
132
+ if in_chan != out_chan * self.expansion or stride != 1:
133
+ self.downsample = nn.Sequential(
134
+ nn.Conv2d(
135
+ in_chan,
136
+ out_chan * self.expansion,
137
+ kernel_size=1,
138
+ stride=stride,
139
+ bias=False,
140
+ ),
141
+ nn.BatchNorm2d(out_chan * self.expansion, momentum=BN_MOMENTUM),
142
+ )
143
+
144
+ def forward(self, x):
145
+
146
+ out = self.conv1(x)
147
+ out = self.bn1(out)
148
+ out = self.relu(out)
149
+
150
+ out = self.conv2(out)
151
+ out = self.bn2(out)
152
+ out = self.relu(out)
153
+
154
+ out = self.conv3(out)
155
+ out = self.bn3(out)
156
+
157
+ shortcut = x
158
+ if self.downsample is not None:
159
+ shortcut = self.downsample(x)
160
+
161
+ out_ = shortcut + out
162
+ out_ = self.relu(out_)
163
+
164
+ return out_
165
+
166
+
167
+ ##########################################################################################
168
+ ##### Vanilla ResNets, but with a more filled out model space, and primarily using basic blocks
169
+ ##########################################################################################
170
+
171
+
172
+ class ResNet(nn.Module):
173
+ def __init__(
174
+ self,
175
+ block,
176
+ layers,
177
+ strides,
178
+ pretrained_path=None,
179
+ branch_chans=[64, 128, 256, 512],
180
+ ):
181
+ super(ResNet, self).__init__()
182
+ self.pretrained_path = pretrained_path
183
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
184
+ self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
185
+ self.relu = nn.ReLU(inplace=relu_inplace)
186
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
187
+ self.inplanes = 64
188
+ self.layer1 = self._make_layer(
189
+ block, branch_chans[0], bnum=layers[0], stride=strides[0]
190
+ )
191
+ self.layer2 = self._make_layer(
192
+ block, branch_chans[1], bnum=layers[1], stride=strides[1]
193
+ )
194
+ self.layer3 = self._make_layer(
195
+ block, branch_chans[2], bnum=layers[2], stride=strides[2]
196
+ )
197
+ self.layer4 = self._make_layer(
198
+ block, branch_chans[3], bnum=layers[3], stride=strides[3]
199
+ )
200
+ self.out_channels = [x * block.expansion for x in branch_chans]
201
+
202
+ def _make_layer(self, block, out_chan, bnum, stride=1):
203
+ layers = [block(self.inplanes, out_chan, stride=stride)]
204
+ self.inplanes = out_chan * block.expansion
205
+ for i in range(bnum - 1):
206
+ layers.append(block(self.inplanes, out_chan, stride=1))
207
+ return nn.Sequential(*layers)
208
+
209
+ def forward(self, x):
210
+ x = self.conv1(x)
211
+ x = self.relu(self.bn1(x))
212
+ x = self.maxpool(x)
213
+
214
+ feat4 = self.layer1(x)
215
+ feat8 = self.layer2(feat4) # 1/8
216
+ feat16 = self.layer3(feat8) # 1/16
217
+ feat32 = self.layer4(feat16) # 1/32
218
+ return feat4, feat8, feat16, feat32
219
+
220
+ def load_weights(self, pretrained_path=None):
221
+ if not pretrained_path:
222
+ pretrained_path = self.pretrained_path
223
+ if self.pretrained_path or pretrained_path:
224
+ pretrained_dict = torch.load(
225
+ pretrained_path, map_location={"cuda:0": "cpu"}
226
+ )
227
+ print(f"Loading backbone weights from {pretrained_path} with strict=False")
228
+ print(f"Caution!! Things could silently fail here")
229
+ self.load_state_dict(pretrained_dict, strict=False)
230
+ else:
231
+ print("No backbone weights loaded")
232
+
233
+
234
+ ##########################################################################################
235
+ ##### Vanilla ResNet instantiations
236
+ ##### The versions marked with _D are not trained on ImageNet, and use the weights from
237
+ ##### the respective models without a _D in the name
238
+ ##########################################################################################
239
+
240
+
241
+ def Resnet18_D(**kwargs):
242
+ model = ResNet(BasicBlock, [2, 2, 2, 2], [2, 2, 2, 2]) # , model_paths["resnet18"])
243
+ return model
244
+
245
+
246
+ def Resnet18(**kwargs):
247
+ model = ResNet(BasicBlock, [2, 2, 2, 2], [1, 2, 2, 2]) # , model_paths["resnet18"])
248
+ return model
249
+
250
+
251
+ def Resnet34_D(**kwargs):
252
+ model = ResNet(BasicBlock, [3, 4, 6, 3], [2, 2, 2, 2]) # , model_paths["resnet34"])
253
+ return model
254
+
255
+
256
+ def Resnet34(**kwargs):
257
+ model = ResNet(BasicBlock, [3, 4, 6, 3], [1, 2, 2, 2]) # , model_paths["resnet34"])
258
+ return model
259
+
260
+
261
+ def Resnet50_D(**kwargs):
262
+ model = ResNet(Bottleneck, [3, 4, 6, 3], [2, 2, 2, 2]) # , model_paths["resnet50"])
263
+ return model
264
+
265
+
266
+ def Resnet50(**kwargs):
267
+ model = ResNet(Bottleneck, [3, 4, 6, 3], [1, 2, 2, 2]) # , model_paths["resnet50"])
268
+ return model
269
+
270
+
271
+ # can use model_paths["resnet34"] to initialize the weights here, for instance
272
+ def Resnet56_D(**kwargs):
273
+ model = ResNet(BasicBlock, [4, 8, 12, 3], [2, 2, 2, 2])
274
+ return model
275
+
276
+
277
+ def Resnet56(**kwargs):
278
+ model = ResNet(BasicBlock, [4, 8, 12, 3], [1, 2, 2, 2])
279
+ return model
280
+
281
+
282
+ def Resnet86_D(**kwargs):
283
+ model = ResNet(BasicBlock, [8, 12, 16, 6], [2, 2, 2, 2])
284
+ return model
285
+
286
+
287
+ def Resnet86(**kwargs):
288
+ model = ResNet(BasicBlock, [8, 12, 16, 6], [1, 2, 2, 2])
289
+ return model
290
+
291
+
292
+ def Resnet101_D(**kwargs):
293
+ model = ResNet(
294
+ Bottleneck, [3, 4, 23, 3], [2, 2, 2, 2]
295
+ ) # , model_paths["resnet101"])
296
+ return model
297
+
298
+
299
+ def Resnet101(**kwargs):
300
+ model = ResNet(
301
+ Bottleneck, [3, 4, 23, 3], [1, 2, 2, 2]
302
+ ) # , model_paths["resnet101"])
303
+ return model
304
+
305
+
306
+ def Resnet134_D(**kwargs):
307
+ model = ResNet(BasicBlock, [8, 18, 28, 12], [2, 2, 2, 2])
308
+ return model
309
+
310
+
311
+ def Resnet134(**kwargs):
312
+ model = ResNet(BasicBlock, [8, 18, 28, 12], [1, 2, 2, 2])
313
+ return model
314
+
315
+
316
+ def Resnet150_D(**kwargs):
317
+ model = ResNet(BasicBlock, [16, 18, 28, 12], [2, 2, 2, 2])
318
+ return model
319
+
320
+
321
+ def Resnet150(**kwargs):
322
+ model = ResNet(BasicBlock, [16, 18, 28, 12], [1, 2, 2, 2])
323
+ return model
324
+
325
+
326
+ ##########################################################################################
327
+ ##### Slim ResNets. Narrower, with a deeper stem
328
+ ##########################################################################################
329
+
330
+
331
+ class ResNetS(nn.Module):
332
+ def __init__(
333
+ self,
334
+ block,
335
+ layers,
336
+ strides,
337
+ pretrained_path=None,
338
+ branch_chans=[64, 128, 192, 320],
339
+ ):
340
+ super(ResNetS, self).__init__()
341
+ self.pretrained_path = pretrained_path
342
+ self.conv0 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
343
+ self.bn0 = nn.BatchNorm2d(32, momentum=BN_MOMENTUM)
344
+ self.relu0 = nn.ReLU(inplace=relu_inplace)
345
+ self.conv1 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False)
346
+ self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
347
+ self.relu1 = nn.ReLU(inplace=relu_inplace)
348
+ self.inplanes = 64
349
+ self.layer1 = self._make_layer(
350
+ block, branch_chans[0], bnum=layers[0], stride=strides[0]
351
+ )
352
+ self.layer2 = self._make_layer(
353
+ block, branch_chans[1], bnum=layers[1], stride=strides[1]
354
+ )
355
+ self.layer3 = self._make_layer(
356
+ block, branch_chans[2], bnum=layers[2], stride=strides[2]
357
+ )
358
+ self.layer4 = self._make_layer(
359
+ block, branch_chans[3], bnum=layers[3], stride=strides[3]
360
+ )
361
+ self.out_channels = [x * block.expansion for x in branch_chans]
362
+
363
+ def _make_layer(self, block, out_chan, bnum, stride=1):
364
+ layers = [block(self.inplanes, out_chan, stride=stride)]
365
+ self.inplanes = out_chan * block.expansion
366
+ for i in range(bnum - 1):
367
+ layers.append(block(self.inplanes, out_chan, stride=1))
368
+ return nn.Sequential(*layers)
369
+
370
+ def forward(self, x):
371
+ x = self.conv0(x)
372
+ x = self.relu0(self.bn0(x))
373
+ x = self.relu1(self.bn1(self.conv1(x)))
374
+
375
+ feat4 = self.layer1(x)
376
+ feat8 = self.layer2(feat4) # 1/8
377
+ feat16 = self.layer3(feat8) # 1/16
378
+ feat32 = self.layer4(feat16) # 1/32
379
+ return feat4, feat8, feat16, feat32
380
+
381
+ def load_weights(self, pretrained_path=None):
382
+ if not pretrained_path:
383
+ pretrained_path = self.pretrained_path
384
+ if self.pretrained_path or pretrained_path:
385
+ pretrained_dict = torch.load(
386
+ pretrained_path, map_location={"cuda:0": "cpu"}
387
+ )
388
+ print(f"Loading backbone weights from {pretrained_path} with strict=False")
389
+ print(f"Caution!! Things could silently fail here")
390
+ self.load_state_dict(pretrained_dict, strict=False)
391
+ else:
392
+ print("No backbone weights loaded")
393
+
394
+
395
+ ##########################################################################################
396
+ ##### Slim ResNet Instantiations
397
+ ##### The versions marked with _D are not trained on ImageNet, and use the weights from
398
+ ##### the respective models without a _D in the name
399
+ ##########################################################################################
400
+
401
+
402
+ def Resnet22S_D(**kwargs):
403
+ model = ResNetS(BasicBlock, [2, 3, 3, 2], [2, 2, 2, 2])
404
+ return model
405
+
406
+
407
+ def Resnet22S(**kwargs):
408
+ model = ResNetS(BasicBlock, [2, 3, 3, 2], [1, 2, 2, 2])
409
+ return model
410
+
411
+
412
+ def Resnet30S_D(**kwargs):
413
+ model = ResNetS(BasicBlock, [3, 4, 4, 3], [2, 2, 2, 2])
414
+ return model
415
+
416
+
417
+ def Resnet30S(**kwargs):
418
+ model = ResNetS(BasicBlock, [3, 4, 4, 3], [1, 2, 2, 2])
419
+ return model
420
+
421
+
422
+ def Resnet40S_D(**kwargs):
423
+ model = ResNetS(BasicBlock, [4, 5, 6, 4], [2, 2, 2, 2])
424
+ return model
425
+
426
+
427
+ def Resnet40S(**kwargs):
428
+ model = ResNetS(BasicBlock, [4, 5, 6, 4], [1, 2, 2, 2])
429
+ return model
430
+
431
+
432
+ def Resnet54S_D(**kwargs):
433
+ model = ResNetS(BasicBlock, [5, 8, 8, 5], [2, 2, 2, 2])
434
+ return model
435
+
436
+
437
+ def Resnet54S(**kwargs):
438
+ model = ResNetS(BasicBlock, [5, 8, 8, 5], [1, 2, 2, 2])
439
+ return model
440
+
441
+
442
+ def Resnet78S_D(**kwargs):
443
+ model = ResNetS(BasicBlock, [6, 12, 12, 8], [2, 2, 2, 2])
444
+ return model
445
+
446
+
447
+ def Resnet78S(**kwargs):
448
+ model = ResNetS(BasicBlock, [6, 12, 12, 8], [1, 2, 2, 2])
449
+ return model
450
+
451
+
452
+ def Resnet86S_D(**kwargs):
453
+ model = ResNetS(BasicBlock, [8, 12, 16, 6], [2, 2, 2, 2])
454
+ return model
455
+
456
+
457
+ def Resnet86S(**kwargs):
458
+ model = ResNetS(BasicBlock, [8, 12, 16, 6], [1, 2, 2, 2])
459
+ return model
460
+
461
+
462
+ def Resnet150S_D(**kwargs):
463
+ model = ResNetS(BasicBlock, [16, 18, 28, 12], [2, 2, 2, 2])
464
+ return model
465
+
466
+
467
+ def Resnet150S(**kwargs):
468
+ model = ResNetS(BasicBlock, [16, 18, 28, 12], [1, 2, 2, 2])
469
+ return model
470
+
471
+
472
+ ##########################################################################################
473
+ ##### 3 Stage ResNets
474
+ ##########################################################################################
475
+
476
+
477
+ class ResNetNarrow(nn.Module):
478
+ def __init__(
479
+ self,
480
+ block,
481
+ layers,
482
+ strides,
483
+ pretrained_path=None,
484
+ branch_chans=[64, 96, 160, 320],
485
+ ):
486
+ super(ResNetNarrow, self).__init__()
487
+ self.pretrained_path = pretrained_path
488
+ # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
489
+ self.conv0 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
490
+ self.bn0 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
491
+ self.relu0 = nn.ReLU(inplace=relu_inplace)
492
+ self.conv1 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
493
+ self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
494
+ self.relu1 = nn.ReLU(inplace=relu_inplace)
495
+ self.conv2 = nn.Conv2d(
496
+ 64, branch_chans[0], kernel_size=3, stride=1, padding=1, bias=False
497
+ )
498
+ self.bn2 = nn.BatchNorm2d(branch_chans[0], momentum=BN_MOMENTUM)
499
+ self.relu2 = nn.ReLU(inplace=relu_inplace)
500
+ self.inplanes = branch_chans[0]
501
+ self.layer1 = self._make_layer(
502
+ block, branch_chans[1], bnum=layers[0], stride=strides[0]
503
+ )
504
+ self.layer2 = self._make_layer(
505
+ block, branch_chans[2], bnum=layers[1], stride=strides[1]
506
+ )
507
+ self.layer3 = self._make_layer(
508
+ block, branch_chans[3], bnum=layers[2], stride=strides[2]
509
+ )
510
+ # Always load weights, and re-init from scratch if pre-trained is not specified. A little costly, but less messy
511
+ # self.apply(seg_model_weight_initializer) #For layers not present in the snapshot ??
512
+ # self.load_weights(pretrained_path)
513
+ # branch_chans = [64, 96, 160, 320]
514
+ self.out_channels = [x * block.expansion for x in branch_chans]
515
+
516
+ def _make_layer(self, block, out_chan, bnum, stride=1):
517
+ layers = [block(self.inplanes, out_chan, stride=stride)]
518
+ self.inplanes = out_chan * block.expansion
519
+ for i in range(bnum - 1):
520
+ layers.append(block(self.inplanes, out_chan, stride=1))
521
+ return nn.Sequential(*layers)
522
+
523
+ def forward(self, x):
524
+ x = self.conv0(x)
525
+ x = self.relu0(self.bn0(x))
526
+ x = self.relu1(self.bn1(self.conv1(x)))
527
+ feat4 = self.relu2(self.bn2(self.conv2(x)))
528
+
529
+ feat8 = self.layer1(feat4) # 1/8
530
+ feat16 = self.layer2(feat8) # 1/16
531
+ feat32 = self.layer3(feat16) # 1/32
532
+ return feat4, feat8, feat16, feat32
533
+
534
+ def load_weights(self, pretrained_path=None):
535
+ if not pretrained_path:
536
+ pretrained_path = self.pretrained_path
537
+ if self.pretrained_path or pretrained_path:
538
+ pretrained_dict = torch.load(
539
+ pretrained_path, map_location={"cuda:0": "cpu"}
540
+ )
541
+ print(f"Loading backbone weights from {pretrained_path} with strict=False")
542
+ print(f"Caution!! Things could silently fail here")
543
+ self.load_state_dict(pretrained_dict, strict=False)
544
+ else:
545
+ print("No backbone weights loaded")
546
+
547
+
548
+ ##########################################################################################
549
+ ##### 3 Stage ResNet Instantiations
550
+ ##### These backbones do not differ between imagenet and cityscapes
551
+ ##########################################################################################
552
+
553
+
554
+ def Resnet122N(**kwargs):
555
+ model = ResNetNarrow(
556
+ BasicBlock, [16, 24, 20], [2, 2, 2], branch_chans=[64, 96, 160, 320]
557
+ )
558
+ return model
559
+
560
+
561
+ def Resnet74N(**kwargs):
562
+ model = ResNetNarrow(
563
+ BasicBlock, [8, 12, 16], [2, 2, 2], branch_chans=[64, 96, 160, 320]
564
+ )
565
+ return model
566
+
567
+
568
+ def Resnet46N(**kwargs):
569
+ model = ResNetNarrow(
570
+ BasicBlock, [6, 8, 8], [2, 2, 2], branch_chans=[64, 96, 160, 320]
571
+ )
572
+ return model
573
+
574
+
575
+ def Resnet122NS(**kwargs):
576
+ model = ResNetNarrow(
577
+ BasicBlock, [16, 24, 20], [2, 2, 2], branch_chans=[64, 64, 128, 256]
578
+ )
579
+ return model
580
+
581
+
582
+ def Resnet74NS(**kwargs):
583
+ model = ResNetNarrow(
584
+ BasicBlock, [8, 12, 16], [2, 2, 2], branch_chans=[64, 64, 128, 256]
585
+ )
586
+ return model
587
+
588
+
589
+ def Resnet46NS(**kwargs):
590
+ model = ResNetNarrow(
591
+ BasicBlock, [6, 8, 8], [2, 2, 2], branch_chans=[64, 64, 128, 256]
592
+ )
593
+ return model
model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022 Qualcomm Technologies, Inc.
2
+ # All Rights Reserved.
3
+
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import init
7
+ import numpy as np
8
+
9
+
10
+ def dense_kernel_initializer(tensor):
11
+ _, fan_out = nn.init._calculate_fan_in_and_fan_out(tensor)
12
+ init_range = 1.0 / np.sqrt(fan_out)
13
+
14
+ return nn.init.uniform_(tensor, a=-init_range, b=init_range)
15
+
16
+
17
+ def model_weight_initializer(m):
18
+ """
19
+ Usage:
20
+ model = Model()
21
+ model.apply(weight_init)
22
+ """
23
+ if isinstance(m, nn.Conv2d):
24
+ # Yes, this non-fancy init is on purpose,
25
+ # and seems to work better in practice for segmentation
26
+ if hasattr(m, "weight"):
27
+ nn.init.normal_(m.weight, std=0.01)
28
+ if m.bias is not None:
29
+ nn.init.constant_(m.bias, 0.0001)
30
+
31
+ elif isinstance(m, nn.BatchNorm2d):
32
+ nn.init.constant_(m.weight, 1)
33
+ nn.init.constant_(m.bias, 0)
34
+
35
+ elif isinstance(m, nn.Linear):
36
+ dense_kernel_initializer(m.weight.data)
37
+ if m.bias is not None:
38
+ nn.init.zeros_(m.bias.data)
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 1x3x512x1024
4
+ - Number of parameters:18.04M
5
+ - Model size:69.4MB,
6
+ - Output shape: 1x19x64x128
7
+
8
+ Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: SNM972 QCS8550
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### c++
49
+ ```bash
50
+ cd ffnet54s/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp
51
+ mkdir build && cd build
52
+ cmake ..
53
+ make
54
+ ./run_test
55
+ ```
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+
6
+ message(STATUS "oPENCV Library status:")
7
+ message(STATUS ">version:${OpenCV_VERSION}")
8
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
9
+
10
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
11
+
12
+ include_directories(
13
+ /usr/local/include
14
+ /usr/include/opencv4
15
+ )
16
+
17
+ link_directories(
18
+ /usr/local/lib/
19
+ )
20
+
21
+ file(GLOB SRC_LISTS
22
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
23
+ )
24
+
25
+ add_executable(run_test ${SRC_LISTS})
26
+
27
+ target_link_libraries(run_test
28
+ aidlite
29
+ ${OpenCV_LIBS}
30
+ pthread
31
+ )
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <string>
3
+ #include <algorithm>
4
+ #include <cctype>
5
+ #include <cstring> // 用于 memcpy
6
+ #include <opencv2/opencv.hpp>
7
+ #include <aidlux/aidlite/aidlite.hpp>
8
+ #include <vector>
9
+ #include <numeric>
10
+ #include <cmath>
11
+ #include <array>
12
+ #include <cstdint>
13
+
14
+ using namespace cv;
15
+ using namespace std;
16
+ using namespace Aidlux::Aidlite;
17
+
18
+ // 定義顏色表 (19個類別)
19
+ const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
20
+ {0, 0, 0}, // 0=background
21
+ {128, 0, 0}, // 1=aeroplane
22
+ {0, 128, 0}, // 2=bicycle
23
+ {128, 128, 0}, // 3=bird
24
+ {0, 0, 128}, // 4=boat
25
+ {128, 0, 128}, // 5=bottle
26
+ {0, 128, 128}, // 6=bus
27
+ {128, 128, 128}, // 7=car
28
+ {64, 0, 0}, // 8=cat
29
+ {192, 0, 0}, // 9=chair
30
+ {64, 128, 0}, // 10=cow
31
+ {192, 128, 0}, // 11=dining table
32
+ {64, 0, 128}, // 12=dog
33
+ {192, 0, 128}, // 13=horse
34
+ {64, 128, 128}, // 14=motorbike
35
+ {192, 128, 128}, // 15=person
36
+ {0, 64, 0}, // 16=potted plant
37
+ {128, 64, 0}, // 17=sheep
38
+ {0, 192, 0}, // 18=sofa
39
+ }};
40
+
41
+ // 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
42
+ std::vector<float> bilinear_interpolate(
43
+ const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
44
+
45
+ std::vector<float> output(target_h * target_w * channels, 0.0f);
46
+ const float scale_h = static_cast<float>(src_h) / target_h;
47
+ const float scale_w = static_cast<float>(src_w) / target_w;
48
+
49
+ for (int y = 0; y < target_h; ++y) {
50
+ const float y_src = (y + 0.5f) * scale_h - 0.5f;
51
+ const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
52
+ const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
53
+ const float dy = y_src - y0;
54
+
55
+ for (int x = 0; x < target_w; ++x) {
56
+ const float x_src = (x + 0.5f) * scale_w - 0.5f;
57
+ const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
58
+ const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
59
+ const float dx = x_src - x0;
60
+
61
+ for (int c = 0; c < channels; ++c) {
62
+ const int src_idx = c * src_h * src_w;
63
+ const float val00 = input[src_idx + y0 * src_w + x0];
64
+ const float val01 = input[src_idx + y0 * src_w + x1];
65
+ const float val10 = input[src_idx + y1 * src_w + x0];
66
+ const float val11 = input[src_idx + y1 * src_w + x1];
67
+
68
+ const float val = (1 - dy) * (1 - dx) * val00 +
69
+ (1 - dy) * dx * val01 +
70
+ dy * (1 - dx) * val10 +
71
+ dy * dx * val11;
72
+
73
+ output[c * target_h * target_w + y * target_w + x] = val;
74
+ }
75
+ }
76
+ }
77
+ return output;
78
+ }
79
+
80
+ // Softmax 計算 (通道維度)
81
+ void softmax(float* data, int height, int width, int channels) {
82
+ for (int y = 0; y < height; ++y) {
83
+ for (int x = 0; x < width; ++x) {
84
+ float max_val = -INFINITY;
85
+ for (int c = 0; c < channels; ++c) {
86
+ const int idx = c * height * width + y * width + x;
87
+ max_val = std::max(max_val, data[idx]);
88
+ }
89
+
90
+ float sum_exp = 0.0f;
91
+ for (int c = 0; c < channels; ++c) {
92
+ const int idx = c * height * width + y * width + x;
93
+ sum_exp += std::exp(data[idx] - max_val);
94
+ }
95
+
96
+ for (int c = 0; c < channels; ++c) {
97
+ const int idx = c * height * width + y * width + x;
98
+ data[idx] = std::exp(data[idx] - max_val) / sum_exp;
99
+ }
100
+ }
101
+ }
102
+ }
103
+
104
+ // 提取最大類別索引
105
+ std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
106
+ std::vector<uint8_t> pred(height * width);
107
+ for (int y = 0; y < height; ++y) {
108
+ for (int x = 0; x < width; ++x) {
109
+ float max_prob = -INFINITY;
110
+ uint8_t max_idx = 0;
111
+ for (int c = 0; c < channels; ++c) {
112
+ const int idx = c * height * width + y * width + x;
113
+ if (data[idx] > max_prob) {
114
+ max_prob = data[idx];
115
+ max_idx = c;
116
+ }
117
+ }
118
+ pred[y * width + x] = max_idx;
119
+ }
120
+ }
121
+ return pred;
122
+ }
123
+
124
+ // 解碼為 RGB 圖像
125
+ std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
126
+ std::vector<uint8_t> rgb(height * width * 3);
127
+ for (int y = 0; y < height; ++y) {
128
+ for (int x = 0; x < width; ++x) {
129
+ const int idx = y * width + x;
130
+ const uint8_t label = pred[idx];
131
+ if (label < 19) {
132
+ rgb[idx * 3] = label_colors[label][0];
133
+ rgb[idx * 3 + 1] = label_colors[label][1];
134
+ rgb[idx * 3 + 2] = label_colors[label][2];
135
+ } else {
136
+ rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
137
+ }
138
+ }
139
+ }
140
+ return rgb;
141
+ }
142
+
143
+ struct Args {
144
+ std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin";
145
+ std::string imgs = "../2.png";
146
+ int invoke_nums = 10;
147
+ std::string model_type = "QNN";
148
+ };
149
+
150
+ Args parse_args(int argc, char* argv[]) {
151
+ Args args;
152
+ for (int i = 1; i < argc; ++i) {
153
+ std::string arg = argv[i];
154
+ if (arg == "--target_model" && i + 1 < argc) {
155
+ args.target_model = argv[++i];
156
+ } else if (arg == "--imgs" && i + 1 < argc) {
157
+ args.imgs = argv[++i];
158
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
159
+ args.invoke_nums = std::stoi(argv[++i]);
160
+ } else if (arg == "--model_type" && i + 1 < argc) {
161
+ args.model_type = argv[++i];
162
+ }
163
+ }
164
+ return args;
165
+ }
166
+
167
+ std::string to_lower(const std::string& str) {
168
+ std::string lower_str = str;
169
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
170
+ return std::tolower(c);
171
+ });
172
+ return lower_str;
173
+ }
174
+
175
+ int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
176
+
177
+ int current_coordinate[4] = {0, 0, 0, 0};
178
+ for(int a = 0; a < src_dims[0]; ++a){
179
+ current_coordinate[0] = a;
180
+ for(int b = 0; b < src_dims[1]; ++b){
181
+ current_coordinate[1] = b;
182
+ for(int c = 0; c < src_dims[2]; ++c){
183
+ current_coordinate[2] = c;
184
+ for(int d = 0; d < src_dims[3]; ++d){
185
+ current_coordinate[3] = d;
186
+
187
+ int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
188
+ current_coordinate[1]*src_dims[2]*src_dims[3] +
189
+ current_coordinate[2]*src_dims[3] +
190
+ current_coordinate[3];
191
+
192
+ int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
193
+ current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
194
+ current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
195
+ current_coordinate[tsp_dims[3]];
196
+
197
+ dest[new_index] = src[old_index];
198
+ }
199
+ }
200
+ }
201
+ }
202
+
203
+ return EXIT_SUCCESS;
204
+ }
205
+
206
+ cv::Mat post_process(cv::Mat &frame, float* outdata)
207
+ {
208
+ cv::Mat input_image = frame.clone();
209
+ // Initialize vectors to hold respective outputs while unwrapping detections.
210
+ std::vector<int> class_ids;
211
+ std::vector<float> confidences;
212
+ std::vector<cv::Rect> boxes;
213
+ std::vector<cv::Mat> masks;
214
+ std::vector<float> class_scores;
215
+ cv::RNG rng;
216
+ cv::Mat masked_img;
217
+
218
+ unsigned int src_dims[4] = {1, 64,128,19};
219
+ unsigned int tsp_dims[4] = {0,3,1,2};
220
+ unsigned int stride_data_num = 1*64*128*19;
221
+ float* format_data = new float[stride_data_num];
222
+ transpose(outdata, src_dims, tsp_dims, format_data);
223
+ cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
224
+ std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
225
+
226
+ const int channels = 19;
227
+ int target_h = 64, target_w = 128;
228
+ int src_h = 64, src_w = 128;
229
+ // Step 1: 雙線性插值
230
+ auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
231
+
232
+ // Step 2: Softmax
233
+ softmax(interpolated.data(), target_h, target_w, channels);
234
+
235
+ // Step 3: 獲取預測類別
236
+ auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
237
+
238
+ printf("Processing finished.\n");
239
+ // Step 4: 解碼為 RGB
240
+ std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
241
+ cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
242
+
243
+ // Step 2: 转换颜色通道 (RGB → BGR)
244
+ if (channels == 3) {
245
+ cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
246
+ }
247
+ cv::Mat resized_cubic;
248
+ cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
249
+ return resized_cubic;
250
+ }
251
+
252
+
253
+ int invoke(const Args& args) {
254
+ std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
255
+ << "Image Path: " << args.imgs << "\n"
256
+ << "Inference Nums: " << args.invoke_nums << "\n"
257
+ << "Model Type: " << args.model_type << "\n";
258
+ Model* model = Model::create_instance(args.target_model);
259
+ if(model == nullptr){
260
+ printf("Create model failed !\n");
261
+ return EXIT_FAILURE;
262
+ }
263
+ Config* config = Config::create_instance();
264
+ if(config == nullptr){
265
+ printf("Create config failed !\n");
266
+ return EXIT_FAILURE;
267
+ }
268
+ config->implement_type = ImplementType::TYPE_LOCAL;
269
+ std::string model_type_lower = to_lower(args.model_type);
270
+ if (model_type_lower == "qnn"){
271
+ config->framework_type = FrameworkType::TYPE_QNN;
272
+ } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
273
+ config->framework_type = FrameworkType::TYPE_SNPE2;
274
+ }
275
+ config->accelerate_type = AccelerateType::TYPE_DSP;
276
+ config->is_quantify_model = 1;
277
+
278
+ unsigned int model_h = 512;
279
+ unsigned int model_w = 1024;
280
+ std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
281
+ std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
282
+ model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
283
+ std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
284
+ if(fast_interpreter == nullptr){
285
+ printf("build_interpretper_from_model_and_config failed !\n");
286
+ return EXIT_FAILURE;
287
+ }
288
+ int result = fast_interpreter->init();
289
+ if(result != EXIT_SUCCESS){
290
+ printf("interpreter->init() failed !\n");
291
+ return EXIT_FAILURE;
292
+ }
293
+ // load model
294
+ fast_interpreter->load_model();
295
+ if(result != EXIT_SUCCESS){
296
+ printf("interpreter->load_model() failed !\n");
297
+ return EXIT_FAILURE;
298
+ }
299
+ printf("detect model load success!\n");
300
+
301
+ cv::Mat frame = cv::imread(args.imgs);
302
+ if (frame.empty()) {
303
+ printf("detect image load failed!\n");
304
+ return 1;
305
+ }
306
+ printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
307
+ cv::Mat input_data;
308
+ cv::Mat frame_clone = frame.clone();
309
+ cv::Scalar stds_scale(58.395, 57.12, 57.375);
310
+ cv::Scalar means_scale(123.675, 116.28, 103.53);
311
+ cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
312
+ frame_clone.convertTo(input_data, CV_32F);
313
+ cv::subtract(input_data, means_scale, input_data);
314
+ cv::divide(input_data, stds_scale, input_data);
315
+
316
+ float *outdata0 = nullptr;
317
+ std::vector<float> invoke_time;
318
+ for (int i = 0; i < args.invoke_nums; ++i) {
319
+ result = fast_interpreter->set_input_tensor(0, input_data.data);
320
+ if(result != EXIT_SUCCESS){
321
+ printf("interpreter->set_input_tensor() failed !\n");
322
+ return EXIT_FAILURE;
323
+ }
324
+ auto t1 = std::chrono::high_resolution_clock::now();
325
+ result = fast_interpreter->invoke();
326
+ auto t2 = std::chrono::high_resolution_clock::now();
327
+ std::chrono::duration<double> cost_time = t2 - t1;
328
+ invoke_time.push_back(cost_time.count() * 1000);
329
+ if(result != EXIT_SUCCESS){
330
+ printf("interpreter->invoke() failed !\n");
331
+ return EXIT_FAILURE;
332
+ }
333
+ uint32_t out_data_0 = 0;
334
+ result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
335
+ if(result != EXIT_SUCCESS){
336
+ printf("interpreter->get_output_tensor() 1 failed !\n");
337
+ return EXIT_FAILURE;
338
+ }
339
+
340
+ }
341
+
342
+ float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
343
+ float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
344
+ float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
345
+ float var_invoketime = 0.0f;
346
+ for (auto time : invoke_time) {
347
+ var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
348
+ }
349
+ var_invoketime /= args.invoke_nums;
350
+ printf("=======================================\n");
351
+ printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
352
+ args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
353
+ printf("=======================================\n");
354
+
355
+ cv::Mat img = post_process(frame, outdata0);
356
+ cv::imwrite("./results.png", img);
357
+ fast_interpreter->destory();
358
+ return 0;
359
+ }
360
+
361
+
362
+ int main(int argc, char* argv[]) {
363
+ Args args = parse_args(argc, argv);
364
+ return invoke(args);
365
+ }
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e67a07dc0454b16d3363f2b0c92dcc87a10e3dc895fc1571b33bc2df53e3a81
3
+ size 36449096
model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png ADDED

Git LFS Details

  • SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
  • Pointer size: 132 Bytes
  • Size of remote file: 2.28 MB