diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..65537aa24e3a4532458b7f13fde833a3c950096b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text +model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..deefaf184e222d56a045f53fc0fa74709f72f4fd --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md @@ -0,0 +1,55 @@ +## Model Information +### Source model +- Input shape: 1x3x512x1024 +- Number of parameters:18.04M +- Model size:69.4MB, +- Output shape: 1x19x64x128 + +Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master) + +### Converted model + +- Precision: INT8 +- Backend: QNN2.16 +- Target Device: FV01 QCS6490 + +## Inference with AidLite SDK + +### SDK installation +Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/) + +- install AidLite SDK + +```bash +# Install the appropriate version of the aidlite sdk +sudo aid-pkg update +sudo aid-pkg install aidlite-sdk +# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223 +sudo aid-pkg install aidlite-{QNN VERSION} +``` + +- Verify AidLite SDK + +```bash +# aidlite sdk c++ check +python3 -c "import aidlite ; print(aidlite.get_library_version())" + +# aidlite sdk python check +python3 -c "import aidlite ; print(aidlite.get_py_library_version())" +``` + +### Run demo +#### python +```bash +cd python +python3 demo_qnn.py +``` + +#### c++ +```bash +cd ffnet54s/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp +mkdir build && cd build +cmake .. +make +./run_test +``` diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required (VERSION 3.5) +project("run_test") + +find_package(OpenCV REQUIRED) + +message(STATUS "oPENCV Library status:") +message(STATUS ">version:${OpenCV_VERSION}") +message(STATUS "Include:${OpenCV_INCLUDE_DIRS}") + +set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations") + +include_directories( + /usr/local/include + /usr/include/opencv4 +) + +link_directories( + /usr/local/lib/ +) + +file(GLOB SRC_LISTS + ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp +) + +add_executable(run_test ${SRC_LISTS}) + +target_link_libraries(run_test + aidlite + ${OpenCV_LIBS} + pthread +) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..54c6d91879d41f2f63d566935d580525e53ed9b2 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include // 用于 memcpy +#include +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; +using namespace Aidlux::Aidlite; + +// 定義顏色表 (19個類別) +const std::array, 19> label_colors = {{ + {0, 0, 0}, // 0=background + {128, 0, 0}, // 1=aeroplane + {0, 128, 0}, // 2=bicycle + {128, 128, 0}, // 3=bird + {0, 0, 128}, // 4=boat + {128, 0, 128}, // 5=bottle + {0, 128, 128}, // 6=bus + {128, 128, 128}, // 7=car + {64, 0, 0}, // 8=cat + {192, 0, 0}, // 9=chair + {64, 128, 0}, // 10=cow + {192, 128, 0}, // 11=dining table + {64, 0, 128}, // 12=dog + {192, 0, 128}, // 13=horse + {64, 128, 128}, // 14=motorbike + {192, 128, 128}, // 15=person + {0, 64, 0}, // 16=potted plant + {128, 64, 0}, // 17=sheep + {0, 192, 0}, // 18=sofa +}}; + +// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW) +std::vector bilinear_interpolate( + const float* input, int src_h, int src_w, int target_h, int target_w, int channels) { + + std::vector output(target_h * target_w * channels, 0.0f); + const float scale_h = static_cast(src_h) / target_h; + const float scale_w = static_cast(src_w) / target_w; + + for (int y = 0; y < target_h; ++y) { + const float y_src = (y + 0.5f) * scale_h - 0.5f; + const int y0 = std::max(0, std::min(static_cast(y_src), src_h - 1)); + const int y1 = std::max(0, std::min(y0 + 1, src_h - 1)); + const float dy = y_src - y0; + + for (int x = 0; x < target_w; ++x) { + const float x_src = (x + 0.5f) * scale_w - 0.5f; + const int x0 = std::max(0, std::min(static_cast(x_src), src_w - 1)); + const int x1 = std::max(0, std::min(x0 + 1, src_w - 1)); + const float dx = x_src - x0; + + for (int c = 0; c < channels; ++c) { + const int src_idx = c * src_h * src_w; + const float val00 = input[src_idx + y0 * src_w + x0]; + const float val01 = input[src_idx + y0 * src_w + x1]; + const float val10 = input[src_idx + y1 * src_w + x0]; + const float val11 = input[src_idx + y1 * src_w + x1]; + + const float val = (1 - dy) * (1 - dx) * val00 + + (1 - dy) * dx * val01 + + dy * (1 - dx) * val10 + + dy * dx * val11; + + output[c * target_h * target_w + y * target_w + x] = val; + } + } + } + return output; +} + +// Softmax 計算 (通道維度) +void softmax(float* data, int height, int width, int channels) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_val = -INFINITY; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + max_val = std::max(max_val, data[idx]); + } + + float sum_exp = 0.0f; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + sum_exp += std::exp(data[idx] - max_val); + } + + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + data[idx] = std::exp(data[idx] - max_val) / sum_exp; + } + } + } +} + +// 提取最大類別索引 +std::vector compute_predictions(const float* data, int height, int width, int channels) { + std::vector pred(height * width); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_prob = -INFINITY; + uint8_t max_idx = 0; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + if (data[idx] > max_prob) { + max_prob = data[idx]; + max_idx = c; + } + } + pred[y * width + x] = max_idx; + } + } + return pred; +} + +// 解碼為 RGB 圖像 +std::vector decode_segmap(const std::vector& pred, int height, int width) { + std::vector rgb(height * width * 3); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + const uint8_t label = pred[idx]; + if (label < 19) { + rgb[idx * 3] = label_colors[label][0]; + rgb[idx * 3 + 1] = label_colors[label][1]; + rgb[idx * 3 + 2] = label_colors[label][2]; + } else { + rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0; + } + } + } + return rgb; +} + +struct Args { + std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"; + std::string imgs = "../2.png"; + int invoke_nums = 10; + std::string model_type = "QNN"; +}; + +Args parse_args(int argc, char* argv[]) { + Args args; + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "--target_model" && i + 1 < argc) { + args.target_model = argv[++i]; + } else if (arg == "--imgs" && i + 1 < argc) { + args.imgs = argv[++i]; + } else if (arg == "--invoke_nums" && i + 1 < argc) { + args.invoke_nums = std::stoi(argv[++i]); + } else if (arg == "--model_type" && i + 1 < argc) { + args.model_type = argv[++i]; + } + } + return args; +} + +std::string to_lower(const std::string& str) { + std::string lower_str = str; + std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { + return std::tolower(c); + }); + return lower_str; +} + +int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){ + + int current_coordinate[4] = {0, 0, 0, 0}; + for(int a = 0; a < src_dims[0]; ++a){ + current_coordinate[0] = a; + for(int b = 0; b < src_dims[1]; ++b){ + current_coordinate[1] = b; + for(int c = 0; c < src_dims[2]; ++c){ + current_coordinate[2] = c; + for(int d = 0; d < src_dims[3]; ++d){ + current_coordinate[3] = d; + + int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + + current_coordinate[1]*src_dims[2]*src_dims[3] + + current_coordinate[2]*src_dims[3] + + current_coordinate[3]; + + int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[3]]; + + dest[new_index] = src[old_index]; + } + } + } + } + + return EXIT_SUCCESS; +} + +cv::Mat post_process(cv::Mat &frame, float* outdata) +{ + cv::Mat input_image = frame.clone(); + // Initialize vectors to hold respective outputs while unwrapping detections. + std::vector class_ids; + std::vector confidences; + std::vector boxes; + std::vector masks; + std::vector class_scores; + cv::RNG rng; + cv::Mat masked_img; + + unsigned int src_dims[4] = {1, 64,128,19}; + unsigned int tsp_dims[4] = {0,3,1,2}; + unsigned int stride_data_num = 1*64*128*19; + float* format_data = new float[stride_data_num]; + transpose(outdata, src_dims, tsp_dims, format_data); + cv::Mat proto_buffer(19,64*128, CV_32F, format_data); + std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl; + + const int channels = 19; + int target_h = 64, target_w = 128; + int src_h = 64, src_w = 128; + // Step 1: 雙線性插值 + auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels); + + // Step 2: Softmax + softmax(interpolated.data(), target_h, target_w, channels); + + // Step 3: 獲取預測類別 + auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels); + + printf("Processing finished.\n"); + // Step 4: 解碼為 RGB + std::vector rgb_data = decode_segmap(predictions, target_h, target_w); + cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast(rgb_data.data())); + + // Step 2: 转换颜色通道 (RGB → BGR) + if (channels == 3) { + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + } + cv::Mat resized_cubic; + cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC); + return resized_cubic; +} + + +int invoke(const Args& args) { + std::cout << "Start main ... ... Model Path: " << args.target_model << "\n" + << "Image Path: " << args.imgs << "\n" + << "Inference Nums: " << args.invoke_nums << "\n" + << "Model Type: " << args.model_type << "\n"; + Model* model = Model::create_instance(args.target_model); + if(model == nullptr){ + printf("Create model failed !\n"); + return EXIT_FAILURE; + } + Config* config = Config::create_instance(); + if(config == nullptr){ + printf("Create config failed !\n"); + return EXIT_FAILURE; + } + config->implement_type = ImplementType::TYPE_LOCAL; + std::string model_type_lower = to_lower(args.model_type); + if (model_type_lower == "qnn"){ + config->framework_type = FrameworkType::TYPE_QNN; + } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") { + config->framework_type = FrameworkType::TYPE_SNPE2; + } + config->accelerate_type = AccelerateType::TYPE_DSP; + config->is_quantify_model = 1; + + unsigned int model_h = 512; + unsigned int model_w = 1024; + std::vector> input_shapes = {{1,3,model_h,model_w}}; + std::vector> output_shapes = {{1,64,128,19}}; + model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32); + std::unique_ptr fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config); + if(fast_interpreter == nullptr){ + printf("build_interpretper_from_model_and_config failed !\n"); + return EXIT_FAILURE; + } + int result = fast_interpreter->init(); + if(result != EXIT_SUCCESS){ + printf("interpreter->init() failed !\n"); + return EXIT_FAILURE; + } + // load model + fast_interpreter->load_model(); + if(result != EXIT_SUCCESS){ + printf("interpreter->load_model() failed !\n"); + return EXIT_FAILURE; + } + printf("detect model load success!\n"); + + cv::Mat frame = cv::imread(args.imgs); + if (frame.empty()) { + printf("detect image load failed!\n"); + return 1; + } + printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows); + cv::Mat input_data; + cv::Mat frame_clone = frame.clone(); + cv::Scalar stds_scale(58.395, 57.12, 57.375); + cv::Scalar means_scale(123.675, 116.28, 103.53); + cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h)); + frame_clone.convertTo(input_data, CV_32F); + cv::subtract(input_data, means_scale, input_data); + cv::divide(input_data, stds_scale, input_data); + + float *outdata0 = nullptr; + std::vector invoke_time; + for (int i = 0; i < args.invoke_nums; ++i) { + result = fast_interpreter->set_input_tensor(0, input_data.data); + if(result != EXIT_SUCCESS){ + printf("interpreter->set_input_tensor() failed !\n"); + return EXIT_FAILURE; + } + auto t1 = std::chrono::high_resolution_clock::now(); + result = fast_interpreter->invoke(); + auto t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration cost_time = t2 - t1; + invoke_time.push_back(cost_time.count() * 1000); + if(result != EXIT_SUCCESS){ + printf("interpreter->invoke() failed !\n"); + return EXIT_FAILURE; + } + uint32_t out_data_0 = 0; + result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0); + if(result != EXIT_SUCCESS){ + printf("interpreter->get_output_tensor() 1 failed !\n"); + return EXIT_FAILURE; + } + + } + + float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end()); + float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end()); + float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums; + float var_invoketime = 0.0f; + for (auto time : invoke_time) { + var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time); + } + var_invoketime /= args.invoke_nums; + printf("=======================================\n"); + printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", + args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime); + printf("=======================================\n"); + + cv::Mat img = post_process(frame, outdata0); + cv::imwrite("./results.png", img); + fast_interpreter->destory(); + return 0; +} + + +int main(int argc, char* argv[]) { + Args args = parse_args(argc, argv); + return invoke(args); +} diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin new file mode 100644 index 0000000000000000000000000000000000000000..8decb11af15dd8a6dda346fbb82733b3de6392fc --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce910deb26ab4b1c9fb1c77e37b12b913473b18ac59c9ca0b45d65f212292d2 +size 18336944 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py new file mode 100644 index 0000000000000000000000000000000000000000..b42dab13ce7ea1acbf84d8a68c95a69de7978597 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py @@ -0,0 +1,133 @@ +import numpy as np +import torch +import cv2 +import sys +import time +import aidlite +import os + + +def decode_segmap(image, nc=19): + label_colors = np.array([(0, 0, 0), # 0=background + # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle + (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), + # 6=bus, 7=car, 8=cat, 9=chair, 10=cow + (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), + # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person + (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128), + # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor + (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)]) + r = np.zeros_like(image).astype(np.uint8) + g = np.zeros_like(image).astype(np.uint8) + b = np.zeros_like(image).astype(np.uint8) + for l in range(0, nc): + idx = image == l + r[idx] = label_colors[l, 0] + g[idx] = label_colors[l, 1] + b[idx] = label_colors[l, 2] + rgb = np.stack([r, g, b], axis=2) + return rgb + + + +class ffnet54sQnn: + def __init__(self): + super().__init__() + self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin")) + if self.model is None: + print("Create model failed !") + return + + self.config = aidlite.Config.create_instance() + if self.config is None: + print("build_interpretper_from_model_and_config failed !") + return + + self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL + self.config.framework_type = aidlite.FrameworkType.TYPE_QNN + self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP + # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU + self.config.is_quantify_model = 1 + + self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config) + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + return + input_shapes = [[1,512,1024,3]] + output_shapes = [[1,64,128,19]] + self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32, + output_shapes, aidlite.DataType.TYPE_FLOAT32) + + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + result = self.interpreter.init() + if result != 0: + print(f"interpreter init failed !") + result = self.interpreter.load_model() + if result != 0: + print("interpreter load model failed !") + + print(" model load success!") + + def __call__(self, input): + self.interpreter.set_input_tensor(0,input) + invoke_time=[] + invoke_nums =10 + for i in range(invoke_nums): + result = self.interpreter.set_input_tensor(0, input.data) + if result != 0: + print("interpreter set_input_tensor() failed") + t1=time.time() + result = self.interpreter.invoke() + cost_time = (time.time()-t1)*1000 + invoke_time.append(cost_time) + + max_invoke_time = max(invoke_time) + min_invoke_time = min(invoke_time) + mean_invoke_time = sum(invoke_time)/invoke_nums + var_invoketime=np.var(invoke_time) + print("====================================") + print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}") + print("====================================") + features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy() + return features_0 + + + + + +ffnet_segm = ffnet54sQnn() + +frame_ct=0 +image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png") + +image = cv2.imread(image_path) +image=cv2.resize(image,(1024,512)) +frame = np.ascontiguousarray(image[:,:,::-1]) + +mean_data=[123.675, 116.28, 103.53] +std_data=[58.395, 57.12, 57.375] +img_input = (frame-mean_data)/std_data # HWC +input = img_input.astype(np.float32) +input = input[np.newaxis, ...] +input_size = input.shape[1], input.shape[2] #H w +t0 = time.time() +out = ffnet_segm(input) +use_time = round((time.time() - t0) * 1000, 2) +print(f"pose detction inference_time:{use_time} ms") +out = np.transpose(out, (0, 3, 1,2)) +out = torch.from_numpy(out) + +output = torch.nn.functional.interpolate( + out, size=input_size, mode="bilinear", align_corners=False + ) +output_data = torch.nn.functional.softmax(output, dim=1).data +max_probs, predictions = output_data.max(1) + + +prediction = predictions.numpy().astype(np.uint8) +test = decode_segmap( prediction[0]) + +cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1]) +ffnet_segm.interpreter.destory() + diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fa4aa2c5c8ef141951e2bd0ff49e3117e22e9fac --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md @@ -0,0 +1,55 @@ +## Model Information +### Source model +- Input shape: 1x3x512x1024 +- Number of parameters:18.04M +- Model size:69.4MB, +- Output shape: 1x19x64x128 + +Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master) + +### Converted model + +- Precision: W8A16 +- Backend: QNN2.16 +- Target Device: FV01 QCS6490 + +## Inference with AidLite SDK + +### SDK installation +Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/) + +- install AidLite SDK + +```bash +# Install the appropriate version of the aidlite sdk +sudo aid-pkg update +sudo aid-pkg install aidlite-sdk +# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223 +sudo aid-pkg install aidlite-{QNN VERSION} +``` + +- Verify AidLite SDK + +```bash +# aidlite sdk c++ check +python3 -c "import aidlite ; print(aidlite.get_library_version())" + +# aidlite sdk python check +python3 -c "import aidlite ; print(aidlite.get_py_library_version())" +``` + +### Run demo +#### python +```bash +cd python +python3 demo_qnn.py +``` + +#### c++ +```bash +cd ffnet54s/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp +mkdir build && cd build +cmake .. +make +./run_test +``` diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required (VERSION 3.5) +project("run_test") + +find_package(OpenCV REQUIRED) + +message(STATUS "oPENCV Library status:") +message(STATUS ">version:${OpenCV_VERSION}") +message(STATUS "Include:${OpenCV_INCLUDE_DIRS}") + +set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations") + +include_directories( + /usr/local/include + /usr/include/opencv4 +) + +link_directories( + /usr/local/lib/ +) + +file(GLOB SRC_LISTS + ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp +) + +add_executable(run_test ${SRC_LISTS}) + +target_link_libraries(run_test + aidlite + ${OpenCV_LIBS} + pthread +) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..98864a05827949b978ded878584179bcf01aff06 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include // 用于 memcpy +#include +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; +using namespace Aidlux::Aidlite; + +// 定義顏色表 (19個類別) +const std::array, 19> label_colors = {{ + {0, 0, 0}, // 0=background + {128, 0, 0}, // 1=aeroplane + {0, 128, 0}, // 2=bicycle + {128, 128, 0}, // 3=bird + {0, 0, 128}, // 4=boat + {128, 0, 128}, // 5=bottle + {0, 128, 128}, // 6=bus + {128, 128, 128}, // 7=car + {64, 0, 0}, // 8=cat + {192, 0, 0}, // 9=chair + {64, 128, 0}, // 10=cow + {192, 128, 0}, // 11=dining table + {64, 0, 128}, // 12=dog + {192, 0, 128}, // 13=horse + {64, 128, 128}, // 14=motorbike + {192, 128, 128}, // 15=person + {0, 64, 0}, // 16=potted plant + {128, 64, 0}, // 17=sheep + {0, 192, 0}, // 18=sofa +}}; + +// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW) +std::vector bilinear_interpolate( + const float* input, int src_h, int src_w, int target_h, int target_w, int channels) { + + std::vector output(target_h * target_w * channels, 0.0f); + const float scale_h = static_cast(src_h) / target_h; + const float scale_w = static_cast(src_w) / target_w; + + for (int y = 0; y < target_h; ++y) { + const float y_src = (y + 0.5f) * scale_h - 0.5f; + const int y0 = std::max(0, std::min(static_cast(y_src), src_h - 1)); + const int y1 = std::max(0, std::min(y0 + 1, src_h - 1)); + const float dy = y_src - y0; + + for (int x = 0; x < target_w; ++x) { + const float x_src = (x + 0.5f) * scale_w - 0.5f; + const int x0 = std::max(0, std::min(static_cast(x_src), src_w - 1)); + const int x1 = std::max(0, std::min(x0 + 1, src_w - 1)); + const float dx = x_src - x0; + + for (int c = 0; c < channels; ++c) { + const int src_idx = c * src_h * src_w; + const float val00 = input[src_idx + y0 * src_w + x0]; + const float val01 = input[src_idx + y0 * src_w + x1]; + const float val10 = input[src_idx + y1 * src_w + x0]; + const float val11 = input[src_idx + y1 * src_w + x1]; + + const float val = (1 - dy) * (1 - dx) * val00 + + (1 - dy) * dx * val01 + + dy * (1 - dx) * val10 + + dy * dx * val11; + + output[c * target_h * target_w + y * target_w + x] = val; + } + } + } + return output; +} + +// Softmax 計算 (通道維度) +void softmax(float* data, int height, int width, int channels) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_val = -INFINITY; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + max_val = std::max(max_val, data[idx]); + } + + float sum_exp = 0.0f; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + sum_exp += std::exp(data[idx] - max_val); + } + + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + data[idx] = std::exp(data[idx] - max_val) / sum_exp; + } + } + } +} + +// 提取最大類別索引 +std::vector compute_predictions(const float* data, int height, int width, int channels) { + std::vector pred(height * width); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_prob = -INFINITY; + uint8_t max_idx = 0; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + if (data[idx] > max_prob) { + max_prob = data[idx]; + max_idx = c; + } + } + pred[y * width + x] = max_idx; + } + } + return pred; +} + +// 解碼為 RGB 圖像 +std::vector decode_segmap(const std::vector& pred, int height, int width) { + std::vector rgb(height * width * 3); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + const uint8_t label = pred[idx]; + if (label < 19) { + rgb[idx * 3] = label_colors[label][0]; + rgb[idx * 3 + 1] = label_colors[label][1]; + rgb[idx * 3 + 2] = label_colors[label][2]; + } else { + rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0; + } + } + } + return rgb; +} + +struct Args { + std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"; + std::string imgs = "../2.png"; + int invoke_nums = 10; + std::string model_type = "QNN"; +}; + +Args parse_args(int argc, char* argv[]) { + Args args; + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "--target_model" && i + 1 < argc) { + args.target_model = argv[++i]; + } else if (arg == "--imgs" && i + 1 < argc) { + args.imgs = argv[++i]; + } else if (arg == "--invoke_nums" && i + 1 < argc) { + args.invoke_nums = std::stoi(argv[++i]); + } else if (arg == "--model_type" && i + 1 < argc) { + args.model_type = argv[++i]; + } + } + return args; +} + +std::string to_lower(const std::string& str) { + std::string lower_str = str; + std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { + return std::tolower(c); + }); + return lower_str; +} + +int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){ + + int current_coordinate[4] = {0, 0, 0, 0}; + for(int a = 0; a < src_dims[0]; ++a){ + current_coordinate[0] = a; + for(int b = 0; b < src_dims[1]; ++b){ + current_coordinate[1] = b; + for(int c = 0; c < src_dims[2]; ++c){ + current_coordinate[2] = c; + for(int d = 0; d < src_dims[3]; ++d){ + current_coordinate[3] = d; + + int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + + current_coordinate[1]*src_dims[2]*src_dims[3] + + current_coordinate[2]*src_dims[3] + + current_coordinate[3]; + + int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[3]]; + + dest[new_index] = src[old_index]; + } + } + } + } + + return EXIT_SUCCESS; +} + +cv::Mat post_process(cv::Mat &frame, float* outdata) +{ + cv::Mat input_image = frame.clone(); + // Initialize vectors to hold respective outputs while unwrapping detections. + std::vector class_ids; + std::vector confidences; + std::vector boxes; + std::vector masks; + std::vector class_scores; + cv::RNG rng; + cv::Mat masked_img; + + unsigned int src_dims[4] = {1, 64,128,19}; + unsigned int tsp_dims[4] = {0,3,1,2}; + unsigned int stride_data_num = 1*64*128*19; + float* format_data = new float[stride_data_num]; + transpose(outdata, src_dims, tsp_dims, format_data); + cv::Mat proto_buffer(19,64*128, CV_32F, format_data); + std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl; + + const int channels = 19; + int target_h = 64, target_w = 128; + int src_h = 64, src_w = 128; + // Step 1: 雙線性插值 + auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels); + + // Step 2: Softmax + softmax(interpolated.data(), target_h, target_w, channels); + + // Step 3: 獲取預測類別 + auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels); + + printf("Processing finished.\n"); + // Step 4: 解碼為 RGB + std::vector rgb_data = decode_segmap(predictions, target_h, target_w); + cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast(rgb_data.data())); + + // Step 2: 转换颜色通道 (RGB → BGR) + if (channels == 3) { + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + } + cv::Mat resized_cubic; + cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC); + return resized_cubic; +} + + +int invoke(const Args& args) { + std::cout << "Start main ... ... Model Path: " << args.target_model << "\n" + << "Image Path: " << args.imgs << "\n" + << "Inference Nums: " << args.invoke_nums << "\n" + << "Model Type: " << args.model_type << "\n"; + Model* model = Model::create_instance(args.target_model); + if(model == nullptr){ + printf("Create model failed !\n"); + return EXIT_FAILURE; + } + Config* config = Config::create_instance(); + if(config == nullptr){ + printf("Create config failed !\n"); + return EXIT_FAILURE; + } + config->implement_type = ImplementType::TYPE_LOCAL; + std::string model_type_lower = to_lower(args.model_type); + if (model_type_lower == "qnn"){ + config->framework_type = FrameworkType::TYPE_QNN; + } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") { + config->framework_type = FrameworkType::TYPE_SNPE2; + } + config->accelerate_type = AccelerateType::TYPE_DSP; + config->is_quantify_model = 1; + + unsigned int model_h = 512; + unsigned int model_w = 1024; + std::vector> input_shapes = {{1,3,model_h,model_w}}; + std::vector> output_shapes = {{1,64,128,19}}; + model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32); + std::unique_ptr fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config); + if(fast_interpreter == nullptr){ + printf("build_interpretper_from_model_and_config failed !\n"); + return EXIT_FAILURE; + } + int result = fast_interpreter->init(); + if(result != EXIT_SUCCESS){ + printf("interpreter->init() failed !\n"); + return EXIT_FAILURE; + } + // load model + fast_interpreter->load_model(); + if(result != EXIT_SUCCESS){ + printf("interpreter->load_model() failed !\n"); + return EXIT_FAILURE; + } + printf("detect model load success!\n"); + + cv::Mat frame = cv::imread(args.imgs); + if (frame.empty()) { + printf("detect image load failed!\n"); + return 1; + } + printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows); + cv::Mat input_data; + cv::Mat frame_clone = frame.clone(); + cv::Scalar stds_scale(58.395, 57.12, 57.375); + cv::Scalar means_scale(123.675, 116.28, 103.53); + cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h)); + frame_clone.convertTo(input_data, CV_32F); + cv::subtract(input_data, means_scale, input_data); + cv::divide(input_data, stds_scale, input_data); + + float *outdata0 = nullptr; + std::vector invoke_time; + for (int i = 0; i < args.invoke_nums; ++i) { + result = fast_interpreter->set_input_tensor(0, input_data.data); + if(result != EXIT_SUCCESS){ + printf("interpreter->set_input_tensor() failed !\n"); + return EXIT_FAILURE; + } + auto t1 = std::chrono::high_resolution_clock::now(); + result = fast_interpreter->invoke(); + auto t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration cost_time = t2 - t1; + invoke_time.push_back(cost_time.count() * 1000); + if(result != EXIT_SUCCESS){ + printf("interpreter->invoke() failed !\n"); + return EXIT_FAILURE; + } + uint32_t out_data_0 = 0; + result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0); + if(result != EXIT_SUCCESS){ + printf("interpreter->get_output_tensor() 1 failed !\n"); + return EXIT_FAILURE; + } + + } + + float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end()); + float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end()); + float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums; + float var_invoketime = 0.0f; + for (auto time : invoke_time) { + var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time); + } + var_invoketime /= args.invoke_nums; + printf("=======================================\n"); + printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", + args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime); + printf("=======================================\n"); + + cv::Mat img = post_process(frame, outdata0); + cv::imwrite("./results.png", img); + fast_interpreter->destory(); + return 0; +} + + +int main(int argc, char* argv[]) { + Args args = parse_args(argc, argv); + return invoke(args); +} diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec96acf48de9aaef2c70540355cea62d6590da50 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3065b3055672fb4f52f561a8ffb6ccb03e501480335f2f5f97d8cfaa6f0a4c +size 72810122 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth new file mode 100644 index 0000000000000000000000000000000000000000..608206844a0e46b19be5285c981dbd3ad1fd78c8 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597f83804cb9866c784b3d99209ee9e3b8b1f0b4f838c022a934ae5726f58218 +size 72423358 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin new file mode 100644 index 0000000000000000000000000000000000000000..5d57eb060f4116abfc81cd9dc31da5e7859c04f7 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab3604cdedd3dc8ff34698bd15a197690df0511eae6e4856da89187fe7d17f1 +size 18537648 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f424b19daac188ea0fc2d01c69779b63e83ebe28 Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py new file mode 100644 index 0000000000000000000000000000000000000000..302d9271c70a34b09f7e19638bdba331702306ec --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py @@ -0,0 +1,11 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +imagenet_base_path = None +cityscapes_base_path = None +model_weights_base_path = None + +CITYSCAPES_MEAN = [0.485, 0.456, 0.406] +CITYSCAPES_STD = [0.229, 0.224, 0.225] +CITYSCAPES_NUM_CLASSES = 19 +CITYSCAPES_IGNORE_LABEL = 255 diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py new file mode 100644 index 0000000000000000000000000000000000000000..d5d71ad2e78f139c1f1923b2fa683ffcb336833e --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py @@ -0,0 +1,133 @@ +import numpy as np +import torch +import cv2 +import sys +import time +import aidlite +import os + + +def decode_segmap(image, nc=19): + label_colors = np.array([(0, 0, 0), # 0=background + # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle + (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), + # 6=bus, 7=car, 8=cat, 9=chair, 10=cow + (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), + # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person + (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128), + # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor + (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)]) + r = np.zeros_like(image).astype(np.uint8) + g = np.zeros_like(image).astype(np.uint8) + b = np.zeros_like(image).astype(np.uint8) + for l in range(0, nc): + idx = image == l + r[idx] = label_colors[l, 0] + g[idx] = label_colors[l, 1] + b[idx] = label_colors[l, 2] + rgb = np.stack([r, g, b], axis=2) + return rgb + + + +class ffnet54sQnn: + def __init__(self): + super().__init__() + self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin")) + if self.model is None: + print("Create model failed !") + return + + self.config = aidlite.Config.create_instance() + if self.config is None: + print("build_interpretper_from_model_and_config failed !") + return + + self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL + self.config.framework_type = aidlite.FrameworkType.TYPE_QNN + self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP + # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU + self.config.is_quantify_model = 1 + + self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config) + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + return + input_shapes = [[1,512,1024,3]] + output_shapes = [[1,64,128,19]] + self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32, + output_shapes, aidlite.DataType.TYPE_FLOAT32) + + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + result = self.interpreter.init() + if result != 0: + print(f"interpreter init failed !") + result = self.interpreter.load_model() + if result != 0: + print("interpreter load model failed !") + + print(" model load success!") + + def __call__(self, input): + self.interpreter.set_input_tensor(0,input) + invoke_time=[] + invoke_nums =10 + for i in range(invoke_nums): + result = self.interpreter.set_input_tensor(0, input.data) + if result != 0: + print("interpreter set_input_tensor() failed") + t1=time.time() + result = self.interpreter.invoke() + cost_time = (time.time()-t1)*1000 + invoke_time.append(cost_time) + + max_invoke_time = max(invoke_time) + min_invoke_time = min(invoke_time) + mean_invoke_time = sum(invoke_time)/invoke_nums + var_invoketime=np.var(invoke_time) + print("====================================") + print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}") + print("====================================") + features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy() + return features_0 + + + + + +ffnet_segm = ffnet54sQnn() + +frame_ct=0 +image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png") + +image = cv2.imread(image_path) +image=cv2.resize(image,(1024,512)) +frame = np.ascontiguousarray(image[:,:,::-1]) + +mean_data=[123.675, 116.28, 103.53] +std_data=[58.395, 57.12, 57.375] +img_input = (frame-mean_data)/std_data # HWC +input = img_input.astype(np.float32) +input = input[np.newaxis, ...] +input_size = input.shape[1], input.shape[2] #H w +t0 = time.time() +out = ffnet_segm(input) +use_time = round((time.time() - t0) * 1000, 2) +print(f"pose detction inference_time:{use_time} ms") +out = np.transpose(out, (0, 3, 1,2)) +out = torch.from_numpy(out) + +output = torch.nn.functional.interpolate( + out, size=input_size, mode="bilinear", align_corners=False + ) +output_data = torch.nn.functional.softmax(output, dim=1).data +max_probs, predictions = output_data.max(1) + + +prediction = predictions.numpy().astype(np.uint8) +test = decode_segmap( prediction[0]) + +cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1]) +ffnet_segm.interpreter.destory() + diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py new file mode 100644 index 0000000000000000000000000000000000000000..996a019876ecf7f555cf69b554d79ba8412a8a4c --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py @@ -0,0 +1,44 @@ +import numpy as np +import torch +import os +import sys +from typing import Callable, Tuple +from models import resnet +from models.ffnet_blocks import create_ffnet +torch.set_grad_enabled(False) + + + +def segmentation_ffnet54S_dBBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet54S_dBBB_mobile", + backbone=resnet.Resnet54S_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth"), + strict_loading=True, + ) + + + + +ffnet54_dbbb = segmentation_ffnet54S_dBBB_mobile() + + + + +num_params = sum(p.numel() for p in ffnet54_dbbb.parameters() if p.requires_grad) +print(f'Number of palm_detector parameters: {num_params}') + + +ffnet_seg = ffnet54_dbbb + +seg_d_in = torch.randn(1, 3, 512, 1024,dtype= torch.float32) + + +source_model = torch.jit.trace(ffnet_seg,seg_d_in) +source_model.save("ffnet54S_dBBB_cityscapes_state_dict_quarts.pt") +print("export pose detect ok!") + diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6c19c7fe6d14325c574f2516229be6e2ed44c122 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +from .ffnet_S_mobile import * +from .ffnet_NS_mobile import * +from .ffnet_gpu_large import * +from .ffnet_S_gpu_large import * +from .ffnet_N_gpu_large import * +from .ffnet_gpu_small import * +from .ffnet_S_gpu_small import * diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3a2c11dd4f0ce92176c55dc137e87afa5866e4d Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6fef455bbf751c8a1c3cd11a387cea3a3f64d9ab Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f800e94f0d1f8afdf5edf4936e73c49dd3255d3 Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..335bcecbfb1794e8bfccf46f3afe9e4498a792d5 Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b5080f2b50bbf837c6163ccb844a0289f6b1e84 Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41ea5f5eda0f99489b0d41d86df8ac7b49ea1704 Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72deade5c33775c901e7f79ee00de77a405d6df1 Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3670e44f49a3d4bf088020f2fdc8c65a499ea7b Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74fdebcbb433f11b9cab28586f770ac759d34195 Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40260528fbcc60f828b8a4254e21285668cc0ead Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51d52047f511bc812cdb1aa1cacc8240e19d48a8 Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5cb683094e22fabeca5cf2ccb7a50a5572a8ac8c Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc differ diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py new file mode 100644 index 0000000000000000000000000000000000000000..16d39c8d1043838c3e60b9c5ff67db2e1c72d88c --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py @@ -0,0 +1,318 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import os +from functools import partial + +import torch + + +from models.utils import model_weight_initializer +from models import resnet + +import os +import sys +import numpy as np + +import torch.nn as nn +import torch._utils +import torch.nn.functional as F + +from models.ffnet_blocks import create_ffnet +from models.model_registry import register_model +from config import model_weights_base_path + + +########################################################################################## +##### 3-Stage Mobile FFNets trained for 1024x512 images, outputing segmentation maps of +##### 256x128 pixels. These models are intended for use with the +##### cityscapes evaluation script, which uses image sizes of 2048x1024 +########################################################################################## +@register_model +def segmentation_ffnet122NS_CBB_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet122NS_CBB_mobile_pre_down", + backbone=resnet.Resnet122NS, + pre_downsampling=True, # Downsample the incoming image, before passing it to the network + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122NS/ffnet122NS_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet74NS_CBB_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet74NS_CBB_mobile_pre_down", + backbone=resnet.Resnet74NS, + pre_downsampling=True, # Downsample the incoming image, before passing it to the network + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet74NS/ffnet74NS_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet46NS_CBB_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet46NS_CBB_mobile_pre_down", + backbone=resnet.Resnet46NS, + pre_downsampling=True, # Downsample the incoming image, before passing it to the network + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet46NS/ffnet46NS_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet122NS_CCC_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet122NS_CCC_mobile_pre_down", + backbone=resnet.Resnet122NS, + pre_downsampling=True, # Downsample the incoming image, before passing it to the network + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122NS/ffnet122NS_CCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet74NS_CCC_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet74NS_CCC_mobile_pre_down", + backbone=resnet.Resnet74NS, + pre_downsampling=True, # Downsample the incoming image, before passing it to the network + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet74NS/ffnet74NS_CCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet46NS_CCC_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet46NS_CCC_mobile_pre_down", + backbone=resnet.Resnet46NS, + pre_downsampling=True, # Downsample the incoming image, before passing it to the network + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet46NS/ffnet46NS_CCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### The **actual** 3-Stage Mobile FFNets to export / use with 1024x512 images directly, +##### and output a segmentation map of 256x128 pixels +########################################################################################## +# +@register_model +def segmentation_ffnet122NS_CBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet122NS_CBB_mobile", + backbone=resnet.Resnet122NS, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122NS/ffnet122NS_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet74NS_CBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet74NS_CBB_mobile", + backbone=resnet.Resnet74NS, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet74NS/ffnet74NS_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet46NS_CBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet46NS_CBB_mobile", + backbone=resnet.Resnet46NS, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet46NS/ffnet46NS_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet122NS_CCC_mobile(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet122NS_CCC_mobile", + backbone=resnet.Resnet122NS, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122NS/ffnet122NS_CCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet74NS_CCC_mobile(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet74NS_CCC_mobile", + backbone=resnet.Resnet74NS, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet74NS/ffnet74NS_CCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet46NS_CCC_mobile(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet46NS_CCC_mobile", + backbone=resnet.Resnet46NS, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet46NS/ffnet46NS_CCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +########################################################################################## +##### Classification models with an FFNet structure. Primarily intended for imagenet +##### initialization of FFNet. +##### See the README for the hyperparameters for training the classification models +########################################################################################## +@register_model +def classification_ffnet122NS_CBX_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="classification", + num_classes=1000, + model_name="ffnnet122NS_CBX_mobile", + backbone=resnet.Resnet122NS, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122NS/ffnet122NS_CBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=False, + strict_loading=True, + dropout_rate=0.2, + ) + + +@register_model +def classification_ffnet74NS_CBX_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="classification", + num_classes=1000, + model_name="ffnnet74NS_CBX_mobile", + backbone=resnet.Resnet74NS, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet74NS/ffnet74NS_CBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=False, + strict_loading=True, + dropout_rate=0.2, + ) + + +@register_model +def classification_ffnet46NS_CBX_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="classification", + num_classes=1000, + model_name="ffnnet46NS_CBX_mobile", + backbone=resnet.Resnet46NS, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet46NS/ffnet46NS_CBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=False, + strict_loading=True, + dropout_rate=0.2, + ) + + +########################################################################################## +##### This is an example of how these FFNet models, which are intended for 1024x512 images +##### would be initialized for training on cityscapes with 2048x1024 images +########################################################################################## +@register_model +def segmentation_ffnet122NS_CBB_mobile_pre_down_train(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet122NS_CBB_mobile_pre_down", + backbone=resnet.Resnet122NS, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122NS/ffnet122NS_CBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch + strict_loading=False, # Strict loading is false here because the weights are going into a model with pre_downsampling=True + ) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py new file mode 100644 index 0000000000000000000000000000000000000000..8883e4f3fe76c62332ef460df19bb1648ef37886 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py @@ -0,0 +1,157 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import os +from functools import partial + +import torch + + +from models import resnet + +import os +import sys +import numpy as np + +import torch.nn as nn +import torch._utils +import torch.nn.functional as F + +from models.ffnet_blocks import create_ffnet +from models.model_registry import register_model +from config import model_weights_base_path + + +########################################################################################## +##### 3-Stage GPU FFNets. These are trained for use with image sizes of 2048x1024 and +##### output segmentation maps of size 512x256 pixels +########################################################################################## +@register_model +def segmentation_ffnet122N_CBB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet122N_CBB", + backbone=resnet.Resnet122N, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122N/ffnet122N_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet74N_CBB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet74N_CBB", + backbone=resnet.Resnet74N, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet74N/ffnet74N_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet46N_CBB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet46N_CBB", + backbone=resnet.Resnet46N, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet46N/ffnet46N_CBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### Classification models with an FFNet structure. Primarily intended for imagenet +##### initialization of FFNet. +##### See the README for the hyperparameters for training the classification models +########################################################################################## +@register_model +def classification_ffnet122N_CBX(): + return create_ffnet( + ffnet_head_type="B", + task="classification", + num_classes=1000, + model_name="ffnnet122N_CBX", + backbone=resnet.Resnet122N, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122N/ffnet122N_CBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=False, + strict_loading=True, + dropout_rate=0.2, + ) + + +@register_model +def classification_ffnet74N_CBX(): + return create_ffnet( + ffnet_head_type="B", + task="classification", + num_classes=1000, + model_name="ffnnet74N_CBX", + backbone=resnet.Resnet74N, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet74N/ffnet74N_CBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=False, + strict_loading=True, + dropout_rate=0.2, + ) + + +@register_model +def classification_ffnet46N_CBX(): + return create_ffnet( + ffnet_head_type="B", + task="classification", + num_classes=1000, + model_name="ffnnet46N_CBX", + backbone=resnet.Resnet46N, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet46N/ffnet46N_CBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=False, + strict_loading=True, + dropout_rate=0.2, + ) + + +########################################################################################## +##### This is an example of how these FFNet models would be initialized for training on +##### cityscapes with 2048x1024 images +########################################################################################## +@register_model +def segmentation_ffnet122N_CBB_train(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet122N_CBB", + backbone=resnet.Resnet122N, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet122N/ffnet122N_CBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch + strict_loading=False, + ) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py new file mode 100644 index 0000000000000000000000000000000000000000..940b9b705e7f0a69f24e0249fc1e21bd8c6164ba --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py @@ -0,0 +1,80 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import os +from functools import partial + +import torch + + +from models import resnet + +import os +import sys +import numpy as np + +import torch.nn as nn +import torch._utils +import torch.nn.functional as F + +from models.ffnet_blocks import create_ffnet +from models.model_registry import register_model +from config import model_weights_base_path + + +########################################################################################## +##### 4-Stage GPU FFNets with Slim backbone. +##### These are trained for use with image sizes of 2048x1024 +##### and output a segmentation map of 512x256 pixels +########################################################################################## +@register_model +def segmentation_ffnet150S_BBB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet150S_BBB", + backbone=resnet.Resnet150S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150S/ffnet150S_BBB_gpu_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet86S_BBB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet86S_BBB", + backbone=resnet.Resnet86S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BBB_gpu_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### This is an example of how these FFNet models would be initialized for training on +##### cityscapes with 2048x1024 images +########################################################################################## +@register_model +def segmentation_ffnet86S_BBB_train(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet86S_BBB", + backbone=resnet.Resnet86S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch + strict_loading=False, + ) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py new file mode 100644 index 0000000000000000000000000000000000000000..12158368fae72b545d7b992d5e120c2019320a85 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py @@ -0,0 +1,119 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import os +from functools import partial + +import torch + + +from models import resnet + +import os +import sys +import numpy as np + +import torch.nn as nn +import torch._utils +import torch.nn.functional as F + +from models.ffnet_blocks import create_ffnet +from models.model_registry import register_model +from config import model_weights_base_path + + +########################################################################################## +##### 4-Stage GPU FFNets with Slim backbone. +##### These are trained for use with image sizes of 2048x1024 +##### and output a segmentation map of 256x128 pixels +########################################################################################## +@register_model +def segmentation_ffnet150S_dBBB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet150S_dBBB", + backbone=resnet.Resnet150S_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150S/ffnet150S_dBBB_gpu_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet86S_dBBB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet86S_dBBB", + backbone=resnet.Resnet86S_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_dBBB_gpu_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### Classification models with an FFNet structure. Primarily intended for imagenet +##### initialization of FFNet. +##### See the README for the hyperparameters for training the classification models +########################################################################################## +@register_model +def classification_ffnet150S_BBX(): + return create_ffnet( + ffnet_head_type="B", + task="classification", + num_classes=1000, + model_name="ffnnet150S_BBX", + backbone=resnet.Resnet150S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150S/ffnet150S_BBX_gpu_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet86S_BBX(): + return create_ffnet( + ffnet_head_type="B", + task="classification", + num_classes=1000, + model_name="ffnnet86S_BBX", + backbone=resnet.Resnet86S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### This is an example of how these FFNet models would be initialized for training on +##### cityscapes with 2048x1024 images +########################################################################################## +@register_model +def segmentation_ffnet86S_dBBB_train(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet86S_dBBB", + backbone=resnet.Resnet86S_D, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch + strict_loading=False, + ) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py new file mode 100644 index 0000000000000000000000000000000000000000..2f240c543ce2c98f549bb2b28cc8c07cf1673230 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py @@ -0,0 +1,555 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import os +from functools import partial + +import torch + + +from models import resnet + +import os +import sys +import numpy as np + +import torch.nn as nn +import torch._utils +import torch.nn.functional as F + +from models.ffnet_blocks import create_ffnet +from models.model_registry import register_model +from config import model_weights_base_path + + +########################################################################################## +##### 4-Stage Mobile FFNets with Slim backbone. +##### These are trained for use with image sizes of 2048x1024, and output a segmentation map +##### of 256x128 pixels +########################################################################################## +@register_model +def segmentation_ffnet86S_dBBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet86S_dBBB_mobile", + backbone=resnet.Resnet86S_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_dBBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet78S_dBBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet78S_dBBB_mobile", + backbone=resnet.Resnet78S_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet78S/ffnet78S_dBBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet54S_dBBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet54S_dBBB_mobile", + backbone=resnet.Resnet54S_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet54S/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet40S_dBBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet40S_dBBB_mobile", + backbone=resnet.Resnet40S_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet40S/ffnet40S_dBBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### 4-Stage Mobile FFNets with Slim backbone, trained for use with image sizes of 1024x512 +##### and output a segmentation map of 256x128 pixels +##### These versions are meant for use with the cityscapes evaluation script, which provides +##### inputs at 2048x1024 +########################################################################################## +@register_model +def segmentation_ffnet150S_BBB_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet150S_BBB_mobile_pre_down", + backbone=resnet.Resnet150S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150S/ffnet150S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet86S_BBB_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet86S_BBB_mobile_pre_down", + backbone=resnet.Resnet86S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet78S_BBB_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet78S_BBB_mobile_pre_down", + backbone=resnet.Resnet78S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet78S/ffnet78S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet54S_BBB_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet54S_BBB_mobile_pre_down", + backbone=resnet.Resnet54S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet54S/ffnet54S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet40S_BBB_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet40S_BBB_mobile_pre_down", + backbone=resnet.Resnet40S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet40S/ffnet40S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet150S_BCC_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet150S_BCC_mobile_pre_down", + backbone=resnet.Resnet150S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150S/ffnet150S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet86S_BCC_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet86S_BCC_mobile_pre_down", + backbone=resnet.Resnet86S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet78S_BCC_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet78S_BCC_mobile_pre_down", + backbone=resnet.Resnet78S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet78S/ffnet78S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet54S_BCC_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet54S_BCC_mobile_pre_down", + backbone=resnet.Resnet54S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet54S/ffnet54S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet40S_BCC_mobile_pre_down(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet40S_BCC_mobile_pre_down", + backbone=resnet.Resnet40S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet40S/ffnet40S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### 4-Stage Mobile FFNets with Slim backbone. +##### These are the actual models, trained for use with image sizes of 1024x512 +##### and output a segmentation map of 256x128 pixels +##### See the versions with _pre_down suffix for models to use with the cityscapes evaluation script +########################################################################################## +@register_model +def segmentation_ffnet150S_BBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet150S_BBB_mobile", + backbone=resnet.Resnet150S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150S/ffnet150S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet86S_BBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet86S_BBB_mobile", + backbone=resnet.Resnet86S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet78S_BBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet78S_BBB_mobile", + backbone=resnet.Resnet78S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet78S/ffnet78S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet54S_BBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet54S_BBB_mobile", + backbone=resnet.Resnet54S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet54S/ffnet54S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet40S_BBB_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="segmentation_B", + num_classes=19, + model_name="ffnnet40S_BBB_mobile", + backbone=resnet.Resnet40S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet40S/ffnet40S_BBB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet150S_BCC_mobile(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet150S_BCC_mobile", + backbone=resnet.Resnet150S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150S/ffnet150S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet86S_BCC_mobile(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet86S_BCC_mobile", + backbone=resnet.Resnet86S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet78S_BCC_mobile(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet78S_BCC_mobile", + backbone=resnet.Resnet78S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet78S/ffnet78S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet54S_BCC_mobile(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet54S_BCC_mobile", + backbone=resnet.Resnet54S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet54S/ffnet54S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +@register_model +def segmentation_ffnet40S_BCC_mobile(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet40S_BCC_mobile", + backbone=resnet.Resnet40S, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet40S/ffnet40S_BCC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=False, # Strict loading is false here because the weights come from a model with pre_downsampling=True + ) + + +########################################################################################## +##### Classification models with an FFNet structure. Primarily intended for imagenet +##### initialization of FFNet. +##### See the README for the hyperparameters for training the classification models +########################################################################################## +@register_model +def classification_ffnet150S_BBX_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="classification", + num_classes=1000, + model_name="ffnnet150S_BBX_mobile", + backbone=resnet.Resnet150S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150S/ffnet150S_BBX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet86S_BBX_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="classification", + num_classes=1000, + model_name="ffnnet86S_BBX_mobile", + backbone=resnet.Resnet86S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86S/ffnet86S_BBX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet78S_BBX_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="classification", + num_classes=1000, + model_name="ffnnet78S_BBX_mobile", + backbone=resnet.Resnet78S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet78S/ffnet78S_BBX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet54S_BBX_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="classification", + num_classes=1000, + model_name="ffnnet54S_BBX_mobile", + backbone=resnet.Resnet54S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet54S/ffnet54S_BBX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet40S_BBX_mobile(): + return create_ffnet( + ffnet_head_type="B_mobile", + task="classification", + num_classes=1000, + model_name="ffnnet40S_BBX_mobile", + backbone=resnet.Resnet40S, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet40S/ffnet40S_BBX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### This is an example of how the FFNet models intended for 1024x512 images +##### would be initialized for training on cityscapes with 2048x1024 images +##### Set up the rest accordingly +########################################################################################## +@register_model +def segmentation_ffnet78S_BCC_mobile_pre_down_train(): + return create_ffnet( + ffnet_head_type="C_mobile", + task="segmentation_C", + num_classes=19, + model_name="ffnnet78S_BCC_mobile_pre_down", + backbone=resnet.Resnet78S, + pre_downsampling=True, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet78S/ffnet78S_BBX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch + strict_loading=False, # Strict loading is false here because the weights are going into a model with pre_downsampling=True + ) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..69074f5d4c540bae853926215dfe1ad0c7ae472d --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py @@ -0,0 +1,663 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +######################################################################################### +# Part of the code in UpBranch adapted from https://github.com/feinanshan/FANet/blob/master/Testing/models/fanet/fanet.py +# +# The original source code was made available under the following license +# MIT License +# +# Copyright (c) 2021 Ping Hu +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +# Code for ClassificationHead adapted from https://github.com/HRNet/HRNet-Image-Classification + +# The original source code was made available under the following license +# MIT License +# Copyright (c) 2019 Microsoft Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +######################################################################################### + + +import math +import torch +from torch import nn +from torch.nn import functional as F +from models.utils import model_weight_initializer +import torchvision.transforms as T +from scipy import ndimage + +# The modules here currently assume that there are always 4 branches. +# It would need to be adapted in order to support a variable number of branches + +# TODO : Pass BN momentum through config +BN_MOMENTUM = 0.1 +gpu_up_kwargs = {"mode": "bilinear", "align_corners": True} +mobile_up_kwargs = {"mode": "nearest"} +relu_inplace = True + +# TODO : Replace functional interpolate operations with upsample modules + + +class ConvBNReLU(nn.Module): + def __init__( + self, + in_chan, + out_chan, + ks=3, + stride=1, + padding=1, + activation=nn.ReLU, + *args, + **kwargs, + ): + super(ConvBNReLU, self).__init__() + layers = [ + nn.Conv2d( + in_chan, + out_chan, + kernel_size=ks, + stride=stride, + padding=padding, + bias=False, + ), + nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM), + ] + if activation: + layers.append(activation(inplace=relu_inplace)) + self.layers = nn.Sequential(*layers) + + def forward(self, x): + return self.layers(x) + + +class AdapterConv(nn.Module): + def __init__( + self, in_channels=[256, 512, 1024, 2048], out_channels=[64, 128, 256, 512] + ): + super(AdapterConv, self).__init__() + assert len(in_channels) == len( + out_channels + ), "Number of input and output branches should match" + self.adapter_conv = nn.ModuleList() + + for k in range(len(in_channels)): + self.adapter_conv.append( + ConvBNReLU(in_channels[k], out_channels[k], ks=1, stride=1, padding=0), + ) + + def forward(self, x): + out = [] + for k in range(len(self.adapter_conv)): + out.append(self.adapter_conv[k](x[k])) + return out + + +class UpsampleCat(nn.Module): + def __init__(self, upsample_kwargs=gpu_up_kwargs): + super(UpsampleCat, self).__init__() + self._up_kwargs = upsample_kwargs + + def forward(self, x): + """Upsample and concatenate feature maps.""" + assert isinstance(x, list) or isinstance(x, tuple) + # print(self._up_kwargs) + x0 = x[0] + _, _, H, W = x0.size() + for i in range(1, len(x)): + x0 = torch.cat([x0, F.interpolate(x[i], (H, W), **self._up_kwargs)], dim=1) + return x0 + + +class UpBranch(nn.Module): + def __init__( + self, + in_channels=[64, 128, 256, 512], + out_channels=[128, 128, 128, 128], + upsample_kwargs=gpu_up_kwargs, + ): + super(UpBranch, self).__init__() + + self._up_kwargs = upsample_kwargs + + self.fam_32_sm = ConvBNReLU( + in_channels[3], out_channels[3], ks=3, stride=1, padding=1 + ) + self.fam_32_up = ConvBNReLU( + in_channels[3], in_channels[2], ks=1, stride=1, padding=0 + ) + self.fam_16_sm = ConvBNReLU( + in_channels[2], out_channels[2], ks=3, stride=1, padding=1 + ) + self.fam_16_up = ConvBNReLU( + in_channels[2], in_channels[1], ks=1, stride=1, padding=0 + ) + self.fam_8_sm = ConvBNReLU( + in_channels[1], out_channels[1], ks=3, stride=1, padding=1 + ) + self.fam_8_up = ConvBNReLU( + in_channels[1], in_channels[0], ks=1, stride=1, padding=0 + ) + self.fam_4 = ConvBNReLU( + in_channels[0], out_channels[0], ks=3, stride=1, padding=1 + ) + + self.high_level_ch = sum(out_channels) + self.out_channels = out_channels + + def forward(self, x): + + feat4, feat8, feat16, feat32 = x + + smfeat_32 = self.fam_32_sm(feat32) + upfeat_32 = self.fam_32_up(feat32) + + _, _, H, W = feat16.size() + x = F.interpolate(upfeat_32, (H, W), **self._up_kwargs) + feat16 + smfeat_16 = self.fam_16_sm(x) + upfeat_16 = self.fam_16_up(x) + + _, _, H, W = feat8.size() + x = F.interpolate(upfeat_16, (H, W), **self._up_kwargs) + feat8 + smfeat_8 = self.fam_8_sm(x) + upfeat_8 = self.fam_8_up(x) + + _, _, H, W = feat4.size() + smfeat_4 = self.fam_4( + F.interpolate(upfeat_8, (H, W), **self._up_kwargs) + feat4 + ) + + return smfeat_4, smfeat_8, smfeat_16, smfeat_32 + + +class FFNetUpHead(nn.Module): + def __init__( + self, + in_chans, + use_adapter_conv=True, + head_type="B_mobile", + task="segmentation_A", + num_classes=19, + base_chans=[64, 128, 256, 512], + dropout_rate=None, # Only used for classification + *args, + **kwargs, + ): + super(FFNetUpHead, self).__init__() + layers = [] + # base_chans = [64, 128, 128, 128] + if head_type.startswith("A"): + base_chans = [64, 128, 256, 512] + elif head_type.startswith("B"): + base_chans = [64, 128, 128, 256] + elif head_type.startswith("C"): + base_chans = [128, 128, 128, 128] + + if use_adapter_conv: + layers.append(AdapterConv(in_chans, base_chans)) + in_chans = base_chans[:] + + if head_type == "A": + layers.append(UpBranch(in_chans)) + elif head_type == "A_mobile": + layers.append(UpBranch(in_chans, upsample_kwargs=mobile_up_kwargs)) + elif head_type == "B": + layers.append(UpBranch(in_chans, [96, 96, 64, 32])) + elif head_type == "B_mobile": + layers.append( + UpBranch(in_chans, [96, 96, 64, 32], upsample_kwargs=mobile_up_kwargs) + ) + elif head_type == "C": + layers.append(UpBranch(in_chans, [128, 16, 16, 16])) + elif head_type == "C_mobile": + layers.append( + UpBranch(in_chans, [128, 16, 16, 16], upsample_kwargs=mobile_up_kwargs) + ) + else: + raise ValueError(f"Unknown FFNetUpHead type {head_type}") + + self.num_features = layers[-1].high_level_ch + self.num_multi_scale_features = layers[-1].out_channels + + if task.startswith("segmentation"): + if "mobile" in head_type: + layers.append(UpsampleCat(mobile_up_kwargs)) + else: + layers.append(UpsampleCat(gpu_up_kwargs)) + + # Gets single scale input + if "_C" in task: + mid_feat = 128 + layers.append( + SegmentationHead_NoSigmoid_1x1( + self.num_features, + mid_feat, + num_outputs=num_classes, + ) + ) + elif "_B" in task: + mid_feat = 256 + layers.append( + SegmentationHead_NoSigmoid_3x3( + self.num_features, + mid_feat, + num_outputs=num_classes, + ) + ) + elif "_A" in task: + mid_feat = 512 + layers.append( + SegmentationHead_NoSigmoid_1x1( + self.num_features, + mid_feat, + num_outputs=num_classes, + ) + ) + else: + raise ValueError(f"Unknown Segmentation Head {task}") + + elif task == "classification": + # Gets multi scale input + layers.append( + ClassificationHead( + self.num_multi_scale_features, + [128, 256, 512, 1024], + num_outputs=num_classes, + dropout_rate=dropout_rate, + ) + ) + self.layers = nn.Sequential(*layers) + + def forward(self, x): + return self.layers(x) + + +class SimpleBottleneckBlock(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super(SimpleBottleneckBlock, self).__init__() + bn_mom = 0.1 + bn_eps = 1e-5 + + self.downsample = None + if stride != 1 or inplanes != planes * self.expansion: + self.downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * self.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(planes * self.expansion, momentum=bn_mom), + ) + + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes, momentum=bn_mom) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(planes, momentum=bn_mom) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False + ) + self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=bn_mom) + self.relu = nn.ReLU(inplace=True) + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ClassificationHead(nn.Module): + def __init__( + self, + pre_head_channels, + head_channels=[128, 256, 512, 1024], + num_outputs=1, + dropout_rate=None, + ): + super(ClassificationHead, self).__init__() + + self.dropout_rate = dropout_rate + bn_mom = 0.1 + bn_eps = 1e-5 + head_block_type = SimpleBottleneckBlock + head_expansion = 4 + + expansion_layers = [] + for i, pre_head_channel in enumerate(pre_head_channels): + expansion_layer = head_block_type( + pre_head_channel, + int(head_channels[i] / head_expansion), + ) + expansion_layers.append(expansion_layer) + self.expansion_layers = nn.ModuleList(expansion_layers) + + # downsampling modules + downsampling_layers = [] + for i in range(len(pre_head_channels) - 1): + input_channels = head_channels[i] + output_channels = head_channels[i + 1] + + downsampling_layer = nn.Sequential( + nn.Conv2d( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=3, + stride=2, + padding=1, + ), + nn.BatchNorm2d(output_channels, momentum=bn_mom), + nn.ReLU(), + ) + + downsampling_layers.append(downsampling_layer) + self.downsampling_layers = nn.ModuleList(downsampling_layers) + + self.final_layer = nn.Sequential( + nn.Conv2d( + in_channels=head_channels[-1], + out_channels=2048, + kernel_size=1, + stride=1, + padding=0, + ), + nn.BatchNorm2d(2048, momentum=bn_mom), + nn.ReLU(inplace=True), + ) + + self.adaptive_avg_pool = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Linear( + 2048, + num_outputs, + ) + + def forward(self, x): + + next_x = self.expansion_layers[0](x[0]) + for i in range(len(self.downsampling_layers)): + next_x = self.expansion_layers[i + 1](x[i + 1]) + self.downsampling_layers[ + i + ](next_x) + x = next_x + + x = self.final_layer(x) + x = self.adaptive_avg_pool(x).squeeze() + + if self.dropout_rate: + x = torch.nn.functional.dropout( + x, p=self._model_config.dropout_rate, training=self.training + ) + + x = self.classifier(x) + return x + + +class SegmentationHead_NoSigmoid_3x3(nn.Module): + def __init__( + self, backbone_channels, mid_channels=256, kernel_size=3, num_outputs=1 + ): + super(SegmentationHead_NoSigmoid_3x3, self).__init__() + last_inp_channels = backbone_channels + self.last_layer = nn.Sequential( + nn.Conv2d( + in_channels=last_inp_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=1, + padding=kernel_size // 2, + ), + nn.BatchNorm2d(mid_channels, momentum=BN_MOMENTUM), + nn.ReLU(inplace=relu_inplace), + nn.Conv2d( + in_channels=mid_channels, + out_channels=num_outputs, + kernel_size=kernel_size, + stride=1, + padding=kernel_size // 2, + ), + ) + + def forward(self, x): + x = self.last_layer(x) + return x + + +class SegmentationHead_NoSigmoid_1x1(nn.Module): + def __init__( + self, backbone_channels, mid_channels=512, kernel_size=3, num_outputs=1 + ): + super(SegmentationHead_NoSigmoid_1x1, self).__init__() + last_inp_channels = backbone_channels + self.last_layer = nn.Sequential( + nn.Conv2d( + in_channels=last_inp_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=1, + padding=kernel_size // 2, + ), + nn.BatchNorm2d(mid_channels, momentum=BN_MOMENTUM), + nn.ReLU(inplace=relu_inplace), + nn.Conv2d( + in_channels=mid_channels, + out_channels=num_outputs, + kernel_size=1, + stride=1, + padding=0, + ), + ) + + def forward(self, x): + x = self.last_layer(x) + return x + + +class GaussianConv2D(nn.Module): + """ + Gaussian smoothing + downsampling, applied independently per channel + THIS IS NOT MEANT FOR USE ON MOBILE. MIGHT BE HORRIBLY SLOW + """ + + def __init__(self, channels, kernel_size, sigma, stride=1): + super(GaussianConv2D, self).__init__() + assert isinstance( + kernel_size, int + ), "Specify kernel size as int. Both dimensions will get the same kernel size" + assert isinstance(sigma, float), "Specify sigma as float. Anisotropic gaussian" + + kernel = torch.zeros(kernel_size, kernel_size) + mean_loc = int((kernel_size - 1) / 2) # Because 0 indexed + kernel[mean_loc, mean_loc] = 1 + kernel = torch.from_numpy(ndimage.gaussian_filter(kernel.numpy(), sigma=sigma)) + + # Make a dwise conv out of the kernel + # Weights of shape out_channels, in_channels/groups, k, k + kernel = kernel.view(1, 1, kernel_size, kernel_size) + kernel = kernel.repeat(channels, 1, 1, 1) + + self.conv = F.conv2d + # Register the kernel buffer instead of as a parameter, so that the training doesn't + # happily update it + self.register_buffer("weight", kernel) + self.channels = channels + self.stride = stride + + def forward(self, input): + return self.conv( + input, weight=self.weight, groups=self.channels, stride=self.stride + ) + + +class FFNet(nn.Module): + def __init__( + self, + ffnet_head_type="A", + num_classes=19, + task="segmentation_A", + use_adapter_convs=True, + backbone=None, + pre_downsampling=False, + model_name="default", + dropout_rate=None, + **kwargs, + ): + super(FFNet, self).__init__() + self.backbone_model = backbone() + branch_chans = self.backbone_model.out_channels + self.use_adapter_convs = use_adapter_convs + self.ffnet_head_type = ffnet_head_type + self.task = task + self.head = FFNetUpHead( + branch_chans, + use_adapter_conv=use_adapter_convs, + head_type=ffnet_head_type, + num_classes=num_classes, + task=task, + dropout_rate=dropout_rate, + ) + self.model_name = model_name + # Pre-downsampling is used while training models that use 1024x512 image sizes rather than 2048x1024. + self.pre_downsampling = pre_downsampling + if self.pre_downsampling: + self.smoothing = GaussianConv2D( + channels=3, kernel_size=5, sigma=0.7, stride=2 + ) + + def forward(self, x): + if self.pre_downsampling: + x = self.smooth_and_downsample_input(x) + x = self.backbone_model(x) + return self.head(x) + + def smooth_and_downsample_input(self, x): + x = F.pad(x, (0, 0, 1, 1), mode="reflect") + return self.smoothing(x) + + def init_model( + self, pretrained_path=None, strict_loading=True, backbone_only=False + ): + print(f"Initializing {self.model_name} weights") + self.apply(model_weight_initializer) + if pretrained_path: + pretrained_dict = torch.load( + pretrained_path, map_location={"cuda:0": "cpu"} + ) + if backbone_only: + backbone_dict = {} + for k, v in pretrained_dict.items(): + if k.startswith("backbone_model"): + backbone_dict[k] = v + self.load_state_dict(backbone_dict, strict=strict_loading) + else: + self.load_state_dict(pretrained_dict, strict=strict_loading) + else: + self.backbone_model.load_weights() + + +def create_ffnet( + pretrained=True, + imagenet_backbone_pretrained=True, + pretrained_weights_path=None, + pretrained_backbone_only=False, + ffnet_head_type="A", + strict_loading=True, + num_classes=19, + task="segmentation_A", + model_name="ffnnet122NS_CCC", + backbone=None, + pre_downsampling=False, + dropout_rate=None, + **kwargs, +): + + if pretrained_weights_path: + model_wghts = pretrained_weights_path + pretrained = True + if imagenet_backbone_pretrained: + pretrained = True + + model = FFNet( + ffnet_head_type=ffnet_head_type, + num_classes=num_classes, + task=task, + use_adapter_convs=True, + backbone=backbone, + pre_downsampling=pre_downsampling, + model_name=model_name, + dropout_rate=dropout_rate, + ) + + model.apply(model_weight_initializer) + if pretrained: + if pretrained_weights_path: + print("Loading pretrained model state dict from {}".format(model_wghts)) + model.init_model( + model_wghts, + strict_loading=strict_loading, + backbone_only=pretrained_backbone_only, + ) + else: + print( + "No model weights provided, attempting to load imagenet pretrained backbone..." + ) + model.init_model() + + model.eval() + return model diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py new file mode 100644 index 0000000000000000000000000000000000000000..bbb492d8b48b91b150acf39d122d82cfaf3df9d6 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py @@ -0,0 +1,235 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import os +from functools import partial + +import torch + + +from models import resnet + +import os +import sys +import numpy as np + +import torch.nn as nn +import torch._utils +import torch.nn.functional as F + +from models.ffnet_blocks import create_ffnet +from models.model_registry import register_model +from config import model_weights_base_path + + +########################################################################################## +##### 4-Stage GPU FFNets with ResNet backbone. +##### These are trained for use with image sizes of 2048x1024 +##### and output a segmentation map of 512x256 pixels +########################################################################################## +@register_model +def segmentation_ffnet150_AAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet150_AAA", + backbone=resnet.Resnet150, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150/ffnet150_AAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet134_AAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet134_AAA", + backbone=resnet.Resnet134, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet134/ffnet134_AAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet101_AAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet101_AAA", + backbone=resnet.Resnet101, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet101/ffnet101_AAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet86_AAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet86_AAA", + backbone=resnet.Resnet86, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86/ffnet86_AAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet56_AAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet56_AAA", + backbone=resnet.Resnet56, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet56/ffnet56_AAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet50_AAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet50_AAA", + backbone=resnet.Resnet50, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet50/ffnet50_AAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet34_AAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet34_AAA", + backbone=resnet.Resnet34, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet34/ffnet34_AAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet150_ABB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet150_ABB", + backbone=resnet.Resnet150, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150/ffnet150_ABB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet86_ABB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet86_ABB", + backbone=resnet.Resnet86, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86/ffnet86_ABB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet56_ABB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet56_ABB", + backbone=resnet.Resnet56, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet56/ffnet56_ABB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet34_ABB(): + return create_ffnet( + ffnet_head_type="B", + task="segmentation_B", + num_classes=19, + model_name="ffnnet34_ABB", + backbone=resnet.Resnet34, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet34/ffnet34_ABB_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### This is an example of how these FFNet models would be initialized for training on +##### cityscapes with 2048x1024 images +########################################################################################## +@register_model +def segmentation_ffnet150_AAA_train(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet150_AAA", + backbone=resnet.Resnet150, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch + strict_loading=False, + ) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py new file mode 100644 index 0000000000000000000000000000000000000000..d26f776e4ece1d56369748b58e1dca5d6132ae6f --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py @@ -0,0 +1,385 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import os +from functools import partial + +import torch + + +from models import resnet + +import os +import sys +import numpy as np + +import torch.nn as nn +import torch._utils +import torch.nn.functional as F + +from models.ffnet_blocks import create_ffnet +from models.model_registry import register_model +from config import model_weights_base_path + + +########################################################################################## +##### 4-Stage GPU FFNets with ResNet backbone. +##### These are trained for use with image sizes of 2048x1024 +##### and output a segmentation map of 256x128 pixels +########################################################################################## +@register_model +def segmentation_ffnet150_dAAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet150_dAAA", + backbone=resnet.Resnet150_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150/ffnet150_dAAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet134_dAAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet134_dAAA", + backbone=resnet.Resnet134_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet134/ffnet134_dAAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet101_dAAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet101_dAAA", + backbone=resnet.Resnet101_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet101/ffnet101_dAAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet86_dAAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet86_dAAA", + backbone=resnet.Resnet86_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86/ffnet86_dAAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet56_dAAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet56_dAAA", + backbone=resnet.Resnet56_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet56/ffnet56_dAAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet50_dAAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet50_dAAA", + backbone=resnet.Resnet50_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet50/ffnet50_dAAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet34_dAAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet34_dAAA", + backbone=resnet.Resnet34_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet34/ffnet34_dAAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet18_dAAA(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_A", + num_classes=19, + model_name="ffnnet18_dAAA", + backbone=resnet.Resnet18_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet18/ffnet18_dAAA_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet150_dAAC(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_C", + num_classes=19, + model_name="ffnnet150_dAAC", + backbone=resnet.Resnet150_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150/ffnet150_dAAC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet86_dAAC(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_C", + num_classes=19, + model_name="ffnnet86_dAAC", + backbone=resnet.Resnet86_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86/ffnet86_dAAC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet34_dAAC(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_C", + num_classes=19, + model_name="ffnnet34_dAAC", + backbone=resnet.Resnet34_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet34/ffnet34_dAAC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def segmentation_ffnet18_dAAC(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_C", + num_classes=19, + model_name="ffnnet18_dAAC", + backbone=resnet.Resnet18_D, + pre_downsampling=False, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet18/ffnet18_dAAC_cityscapes_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### Classification models with an FFNet structure. Primarily intended for imagenet +##### initialization of FFNet. +##### See the README for the hyperparameters for training the classification models +########################################################################################## +@register_model +def classification_ffnet150_AAX(): + return create_ffnet( + ffnet_head_type="A", + task="classification", + num_classes=1000, + model_name="ffnnet150_AAX", + backbone=resnet.Resnet150, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet134_AAX(): + return create_ffnet( + ffnet_head_type="A", + task="classification", + num_classes=1000, + model_name="ffnnet134_AAX", + backbone=resnet.Resnet134, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet134/ffnet134_AAX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet101_AAX(): + return create_ffnet( + ffnet_head_type="A", + task="classification", + num_classes=1000, + model_name="ffnnet101_AAX", + backbone=resnet.Resnet101, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet101/ffnet101_AAX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet86_AAX(): + return create_ffnet( + ffnet_head_type="A", + task="classification", + num_classes=1000, + model_name="ffnnet86_AAX", + backbone=resnet.Resnet86, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet86/ffnet86_AAX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet56_AAX(): + return create_ffnet( + ffnet_head_type="A", + task="classification", + num_classes=1000, + model_name="ffnnet56_AAX", + backbone=resnet.Resnet56, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet56/ffnet56_AAX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet50_AAX(): + return create_ffnet( + ffnet_head_type="A", + task="classification", + num_classes=1000, + model_name="ffnnet50_AAX", + backbone=resnet.Resnet50, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet50/ffnet50_AAX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet34_AAX(): + return create_ffnet( + ffnet_head_type="A", + task="classification", + num_classes=1000, + model_name="ffnnet34_AAX", + backbone=resnet.Resnet34, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet34/ffnet34_AAX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +@register_model +def classification_ffnet18_AAX(): + return create_ffnet( + ffnet_head_type="A", + task="classification", + num_classes=1000, + model_name="ffnnet18_AAX", + backbone=resnet.Resnet18, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet18/ffnet18_AAX_imagenet_state_dict_quarts.pth", + ), + strict_loading=True, + ) + + +########################################################################################## +##### This is an example of how these FFNet models would be initialized for training on +##### cityscapes with 2048x1024 images +########################################################################################## +@register_model +def segmentation_ffnet150_dAAC_train(): + return create_ffnet( + ffnet_head_type="A", + task="segmentation_C", + num_classes=19, + model_name="ffnnet150_dAAC", + backbone=resnet.Resnet150_D, + pretrained_weights_path=os.path.join( + model_weights_base_path, + "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth", + ), + pretrained_backbone_only=True, # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch + strict_loading=False, + ) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py new file mode 100644 index 0000000000000000000000000000000000000000..075db6e7706c995ef14c515e6fd2071259933213 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py @@ -0,0 +1,32 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import sys + +_model_entrypoints = {} + + +def register_model(fn): + # lookup containing module + mod = sys.modules[fn.__module__] + # add model to __all__ in module + model_name = fn.__name__ + if hasattr(mod, "__all__"): + mod.__all__.append(model_name) + else: + mod.__all__ = [model_name] + + # add entries to registry dict/sets + _model_entrypoints[model_name] = fn + return fn + + +def model_entrypoint(model_name): + """Fetch a model entrypoint for specified model name""" + if model_name in _model_entrypoints: + return _model_entrypoints[model_name] + else: + raise RuntimeError( + f"Unknown model ({model_name}); known models are: " + f"{_model_entrypoints.keys()}" + ) diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..0e9cb8aee66ba476462092baf7d80e9564dc6ea3 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py @@ -0,0 +1,593 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +######################################################################### +# Code adapted from https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py + +# The original source code was made available under the following license +# BSD 3-Clause License +# +# Copyright (c) Soumith Chintala 2016, +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +######################################################################### +#### **The main takeaway is that simple FFNets made out of resnet backbones made using basic-block +#### **are just as competitive as complex architectures such as HRNet, DDRNet, FANet etc. + +#### New and old ResNet backbones, designed for use with FFNet. These do not have a classification +#### head attached here. ImageNet training of these backbones is done as an FFNet with a classification +#### head attached. See ffnet.py and ffnet_blocks.py. +#### Also, these models do not make a distinction between GPU and mobile because the elements that we change +#### between the two are among the additional modules that FFNet adds. +######################################################################### +import torch + +#### These are weights for the backbone when trained directly with a classification head attached at the end of the +#### backbone, and not as part of the FFNet structure. For a minor training accuracy advantage, one could use these +#### weights as the initialization for the relevant models in the new family of models, +#### but training from scratch works nearly equally well +model_paths = { + "resnet18": "/pretrained_weights/resnet18.pth", + "resnet34": "/pretrained_weights/resnet34.pth", + "resnet50": "/pretrained_weights/resnet50.pth", + "resnet101": "/pretrained_weights/resnet101.pth", +} + +import torch.nn as nn +import torch._utils + + +BN_MOMENTUM = 0.1 +relu_inplace = True + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False + ) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False + ) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_chan, out_chan, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(in_chan, out_chan, stride) + self.bn1 = nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM) + self.conv2 = conv3x3(out_chan, out_chan) + self.bn2 = nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=relu_inplace) + self.downsample = None + if in_chan != out_chan or stride != 1: + self.downsample = nn.Sequential( + nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM), + ) + + def forward(self, x): + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + out = self.conv2(out) + out = self.bn2(out) + + shortcut = x + if self.downsample is not None: + shortcut = self.downsample(x) + + out_ = shortcut + out + out_ = self.relu(out_) + return out_ + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_chan, out_chan, stride=1, base_width=64): + super(Bottleneck, self).__init__() + width = int(out_chan * (base_width / 64.0)) * 1 + self.conv1 = conv1x1(in_chan, width) + self.bn1 = nn.BatchNorm2d(width, momentum=BN_MOMENTUM) + self.conv2 = conv3x3(width, width, stride) + self.bn2 = nn.BatchNorm2d(width, momentum=BN_MOMENTUM) + self.conv3 = conv1x1(width, out_chan * self.expansion) + self.bn3 = nn.BatchNorm2d(out_chan * self.expansion, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=relu_inplace) + self.downsample = None + if in_chan != out_chan * self.expansion or stride != 1: + self.downsample = nn.Sequential( + nn.Conv2d( + in_chan, + out_chan * self.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(out_chan * self.expansion, momentum=BN_MOMENTUM), + ) + + def forward(self, x): + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + shortcut = x + if self.downsample is not None: + shortcut = self.downsample(x) + + out_ = shortcut + out + out_ = self.relu(out_) + + return out_ + + +########################################################################################## +##### Vanilla ResNets, but with a more filled out model space, and primarily using basic blocks +########################################################################################## + + +class ResNet(nn.Module): + def __init__( + self, + block, + layers, + strides, + pretrained_path=None, + branch_chans=[64, 128, 256, 512], + ): + super(ResNet, self).__init__() + self.pretrained_path = pretrained_path + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=relu_inplace) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.inplanes = 64 + self.layer1 = self._make_layer( + block, branch_chans[0], bnum=layers[0], stride=strides[0] + ) + self.layer2 = self._make_layer( + block, branch_chans[1], bnum=layers[1], stride=strides[1] + ) + self.layer3 = self._make_layer( + block, branch_chans[2], bnum=layers[2], stride=strides[2] + ) + self.layer4 = self._make_layer( + block, branch_chans[3], bnum=layers[3], stride=strides[3] + ) + self.out_channels = [x * block.expansion for x in branch_chans] + + def _make_layer(self, block, out_chan, bnum, stride=1): + layers = [block(self.inplanes, out_chan, stride=stride)] + self.inplanes = out_chan * block.expansion + for i in range(bnum - 1): + layers.append(block(self.inplanes, out_chan, stride=1)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.relu(self.bn1(x)) + x = self.maxpool(x) + + feat4 = self.layer1(x) + feat8 = self.layer2(feat4) # 1/8 + feat16 = self.layer3(feat8) # 1/16 + feat32 = self.layer4(feat16) # 1/32 + return feat4, feat8, feat16, feat32 + + def load_weights(self, pretrained_path=None): + if not pretrained_path: + pretrained_path = self.pretrained_path + if self.pretrained_path or pretrained_path: + pretrained_dict = torch.load( + pretrained_path, map_location={"cuda:0": "cpu"} + ) + print(f"Loading backbone weights from {pretrained_path} with strict=False") + print(f"Caution!! Things could silently fail here") + self.load_state_dict(pretrained_dict, strict=False) + else: + print("No backbone weights loaded") + + +########################################################################################## +##### Vanilla ResNet instantiations +##### The versions marked with _D are not trained on ImageNet, and use the weights from +##### the respective models without a _D in the name +########################################################################################## + + +def Resnet18_D(**kwargs): + model = ResNet(BasicBlock, [2, 2, 2, 2], [2, 2, 2, 2]) # , model_paths["resnet18"]) + return model + + +def Resnet18(**kwargs): + model = ResNet(BasicBlock, [2, 2, 2, 2], [1, 2, 2, 2]) # , model_paths["resnet18"]) + return model + + +def Resnet34_D(**kwargs): + model = ResNet(BasicBlock, [3, 4, 6, 3], [2, 2, 2, 2]) # , model_paths["resnet34"]) + return model + + +def Resnet34(**kwargs): + model = ResNet(BasicBlock, [3, 4, 6, 3], [1, 2, 2, 2]) # , model_paths["resnet34"]) + return model + + +def Resnet50_D(**kwargs): + model = ResNet(Bottleneck, [3, 4, 6, 3], [2, 2, 2, 2]) # , model_paths["resnet50"]) + return model + + +def Resnet50(**kwargs): + model = ResNet(Bottleneck, [3, 4, 6, 3], [1, 2, 2, 2]) # , model_paths["resnet50"]) + return model + + +# can use model_paths["resnet34"] to initialize the weights here, for instance +def Resnet56_D(**kwargs): + model = ResNet(BasicBlock, [4, 8, 12, 3], [2, 2, 2, 2]) + return model + + +def Resnet56(**kwargs): + model = ResNet(BasicBlock, [4, 8, 12, 3], [1, 2, 2, 2]) + return model + + +def Resnet86_D(**kwargs): + model = ResNet(BasicBlock, [8, 12, 16, 6], [2, 2, 2, 2]) + return model + + +def Resnet86(**kwargs): + model = ResNet(BasicBlock, [8, 12, 16, 6], [1, 2, 2, 2]) + return model + + +def Resnet101_D(**kwargs): + model = ResNet( + Bottleneck, [3, 4, 23, 3], [2, 2, 2, 2] + ) # , model_paths["resnet101"]) + return model + + +def Resnet101(**kwargs): + model = ResNet( + Bottleneck, [3, 4, 23, 3], [1, 2, 2, 2] + ) # , model_paths["resnet101"]) + return model + + +def Resnet134_D(**kwargs): + model = ResNet(BasicBlock, [8, 18, 28, 12], [2, 2, 2, 2]) + return model + + +def Resnet134(**kwargs): + model = ResNet(BasicBlock, [8, 18, 28, 12], [1, 2, 2, 2]) + return model + + +def Resnet150_D(**kwargs): + model = ResNet(BasicBlock, [16, 18, 28, 12], [2, 2, 2, 2]) + return model + + +def Resnet150(**kwargs): + model = ResNet(BasicBlock, [16, 18, 28, 12], [1, 2, 2, 2]) + return model + + +########################################################################################## +##### Slim ResNets. Narrower, with a deeper stem +########################################################################################## + + +class ResNetS(nn.Module): + def __init__( + self, + block, + layers, + strides, + pretrained_path=None, + branch_chans=[64, 128, 192, 320], + ): + super(ResNetS, self).__init__() + self.pretrained_path = pretrained_path + self.conv0 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False) + self.bn0 = nn.BatchNorm2d(32, momentum=BN_MOMENTUM) + self.relu0 = nn.ReLU(inplace=relu_inplace) + self.conv1 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.relu1 = nn.ReLU(inplace=relu_inplace) + self.inplanes = 64 + self.layer1 = self._make_layer( + block, branch_chans[0], bnum=layers[0], stride=strides[0] + ) + self.layer2 = self._make_layer( + block, branch_chans[1], bnum=layers[1], stride=strides[1] + ) + self.layer3 = self._make_layer( + block, branch_chans[2], bnum=layers[2], stride=strides[2] + ) + self.layer4 = self._make_layer( + block, branch_chans[3], bnum=layers[3], stride=strides[3] + ) + self.out_channels = [x * block.expansion for x in branch_chans] + + def _make_layer(self, block, out_chan, bnum, stride=1): + layers = [block(self.inplanes, out_chan, stride=stride)] + self.inplanes = out_chan * block.expansion + for i in range(bnum - 1): + layers.append(block(self.inplanes, out_chan, stride=1)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv0(x) + x = self.relu0(self.bn0(x)) + x = self.relu1(self.bn1(self.conv1(x))) + + feat4 = self.layer1(x) + feat8 = self.layer2(feat4) # 1/8 + feat16 = self.layer3(feat8) # 1/16 + feat32 = self.layer4(feat16) # 1/32 + return feat4, feat8, feat16, feat32 + + def load_weights(self, pretrained_path=None): + if not pretrained_path: + pretrained_path = self.pretrained_path + if self.pretrained_path or pretrained_path: + pretrained_dict = torch.load( + pretrained_path, map_location={"cuda:0": "cpu"} + ) + print(f"Loading backbone weights from {pretrained_path} with strict=False") + print(f"Caution!! Things could silently fail here") + self.load_state_dict(pretrained_dict, strict=False) + else: + print("No backbone weights loaded") + + +########################################################################################## +##### Slim ResNet Instantiations +##### The versions marked with _D are not trained on ImageNet, and use the weights from +##### the respective models without a _D in the name +########################################################################################## + + +def Resnet22S_D(**kwargs): + model = ResNetS(BasicBlock, [2, 3, 3, 2], [2, 2, 2, 2]) + return model + + +def Resnet22S(**kwargs): + model = ResNetS(BasicBlock, [2, 3, 3, 2], [1, 2, 2, 2]) + return model + + +def Resnet30S_D(**kwargs): + model = ResNetS(BasicBlock, [3, 4, 4, 3], [2, 2, 2, 2]) + return model + + +def Resnet30S(**kwargs): + model = ResNetS(BasicBlock, [3, 4, 4, 3], [1, 2, 2, 2]) + return model + + +def Resnet40S_D(**kwargs): + model = ResNetS(BasicBlock, [4, 5, 6, 4], [2, 2, 2, 2]) + return model + + +def Resnet40S(**kwargs): + model = ResNetS(BasicBlock, [4, 5, 6, 4], [1, 2, 2, 2]) + return model + + +def Resnet54S_D(**kwargs): + model = ResNetS(BasicBlock, [5, 8, 8, 5], [2, 2, 2, 2]) + return model + + +def Resnet54S(**kwargs): + model = ResNetS(BasicBlock, [5, 8, 8, 5], [1, 2, 2, 2]) + return model + + +def Resnet78S_D(**kwargs): + model = ResNetS(BasicBlock, [6, 12, 12, 8], [2, 2, 2, 2]) + return model + + +def Resnet78S(**kwargs): + model = ResNetS(BasicBlock, [6, 12, 12, 8], [1, 2, 2, 2]) + return model + + +def Resnet86S_D(**kwargs): + model = ResNetS(BasicBlock, [8, 12, 16, 6], [2, 2, 2, 2]) + return model + + +def Resnet86S(**kwargs): + model = ResNetS(BasicBlock, [8, 12, 16, 6], [1, 2, 2, 2]) + return model + + +def Resnet150S_D(**kwargs): + model = ResNetS(BasicBlock, [16, 18, 28, 12], [2, 2, 2, 2]) + return model + + +def Resnet150S(**kwargs): + model = ResNetS(BasicBlock, [16, 18, 28, 12], [1, 2, 2, 2]) + return model + + +########################################################################################## +##### 3 Stage ResNets +########################################################################################## + + +class ResNetNarrow(nn.Module): + def __init__( + self, + block, + layers, + strides, + pretrained_path=None, + branch_chans=[64, 96, 160, 320], + ): + super(ResNetNarrow, self).__init__() + self.pretrained_path = pretrained_path + # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.conv0 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) + self.bn0 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.relu0 = nn.ReLU(inplace=relu_inplace) + self.conv1 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.relu1 = nn.ReLU(inplace=relu_inplace) + self.conv2 = nn.Conv2d( + 64, branch_chans[0], kernel_size=3, stride=1, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(branch_chans[0], momentum=BN_MOMENTUM) + self.relu2 = nn.ReLU(inplace=relu_inplace) + self.inplanes = branch_chans[0] + self.layer1 = self._make_layer( + block, branch_chans[1], bnum=layers[0], stride=strides[0] + ) + self.layer2 = self._make_layer( + block, branch_chans[2], bnum=layers[1], stride=strides[1] + ) + self.layer3 = self._make_layer( + block, branch_chans[3], bnum=layers[2], stride=strides[2] + ) + # Always load weights, and re-init from scratch if pre-trained is not specified. A little costly, but less messy + # self.apply(seg_model_weight_initializer) #For layers not present in the snapshot ?? + # self.load_weights(pretrained_path) + # branch_chans = [64, 96, 160, 320] + self.out_channels = [x * block.expansion for x in branch_chans] + + def _make_layer(self, block, out_chan, bnum, stride=1): + layers = [block(self.inplanes, out_chan, stride=stride)] + self.inplanes = out_chan * block.expansion + for i in range(bnum - 1): + layers.append(block(self.inplanes, out_chan, stride=1)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv0(x) + x = self.relu0(self.bn0(x)) + x = self.relu1(self.bn1(self.conv1(x))) + feat4 = self.relu2(self.bn2(self.conv2(x))) + + feat8 = self.layer1(feat4) # 1/8 + feat16 = self.layer2(feat8) # 1/16 + feat32 = self.layer3(feat16) # 1/32 + return feat4, feat8, feat16, feat32 + + def load_weights(self, pretrained_path=None): + if not pretrained_path: + pretrained_path = self.pretrained_path + if self.pretrained_path or pretrained_path: + pretrained_dict = torch.load( + pretrained_path, map_location={"cuda:0": "cpu"} + ) + print(f"Loading backbone weights from {pretrained_path} with strict=False") + print(f"Caution!! Things could silently fail here") + self.load_state_dict(pretrained_dict, strict=False) + else: + print("No backbone weights loaded") + + +########################################################################################## +##### 3 Stage ResNet Instantiations +##### These backbones do not differ between imagenet and cityscapes +########################################################################################## + + +def Resnet122N(**kwargs): + model = ResNetNarrow( + BasicBlock, [16, 24, 20], [2, 2, 2], branch_chans=[64, 96, 160, 320] + ) + return model + + +def Resnet74N(**kwargs): + model = ResNetNarrow( + BasicBlock, [8, 12, 16], [2, 2, 2], branch_chans=[64, 96, 160, 320] + ) + return model + + +def Resnet46N(**kwargs): + model = ResNetNarrow( + BasicBlock, [6, 8, 8], [2, 2, 2], branch_chans=[64, 96, 160, 320] + ) + return model + + +def Resnet122NS(**kwargs): + model = ResNetNarrow( + BasicBlock, [16, 24, 20], [2, 2, 2], branch_chans=[64, 64, 128, 256] + ) + return model + + +def Resnet74NS(**kwargs): + model = ResNetNarrow( + BasicBlock, [8, 12, 16], [2, 2, 2], branch_chans=[64, 64, 128, 256] + ) + return model + + +def Resnet46NS(**kwargs): + model = ResNetNarrow( + BasicBlock, [6, 8, 8], [2, 2, 2], branch_chans=[64, 64, 128, 256] + ) + return model diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fa22c6702660d8bee9835bd3a91cb472513e0ec2 --- /dev/null +++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022 Qualcomm Technologies, Inc. +# All Rights Reserved. + +import torch +from torch import nn +from torch.nn import init +import numpy as np + + +def dense_kernel_initializer(tensor): + _, fan_out = nn.init._calculate_fan_in_and_fan_out(tensor) + init_range = 1.0 / np.sqrt(fan_out) + + return nn.init.uniform_(tensor, a=-init_range, b=init_range) + + +def model_weight_initializer(m): + """ + Usage: + model = Model() + model.apply(weight_init) + """ + if isinstance(m, nn.Conv2d): + # Yes, this non-fancy init is on purpose, + # and seems to work better in practice for segmentation + if hasattr(m, "weight"): + nn.init.normal_(m.weight, std=0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.Linear): + dense_kernel_initializer(m.weight.data) + if m.bias is not None: + nn.init.zeros_(m.bias.data) diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9da5d40966672c087cb91011395f0e6d4a46976a --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md @@ -0,0 +1,55 @@ +## Model Information +### Source model +- Input shape: 1x3x512x1024 +- Number of parameters:18.04M +- Model size:69.4MB, +- Output shape: 1x19x64x128 + +Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master) + +### Converted model + +- Precision: INT8 +- Backend: QNN2.16 +- Target Device: SNM972 QCS8550 + +## Inference with AidLite SDK + +### SDK installation +Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/) + +- install AidLite SDK + +```bash +# Install the appropriate version of the aidlite sdk +sudo aid-pkg update +sudo aid-pkg install aidlite-sdk +# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223 +sudo aid-pkg install aidlite-{QNN VERSION} +``` + +- Verify AidLite SDK + +```bash +# aidlite sdk c++ check +python3 -c "import aidlite ; print(aidlite.get_library_version())" + +# aidlite sdk python check +python3 -c "import aidlite ; print(aidlite.get_py_library_version())" +``` + +### Run demo +#### python +```bash +cd python +python3 demo_qnn.py +``` + +#### c++ +```bash +cd ffnet54s/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp +mkdir build && cd build +cmake .. +make +./run_test +``` diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required (VERSION 3.5) +project("run_test") + +find_package(OpenCV REQUIRED) + +message(STATUS "oPENCV Library status:") +message(STATUS ">version:${OpenCV_VERSION}") +message(STATUS "Include:${OpenCV_INCLUDE_DIRS}") + +set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations") + +include_directories( + /usr/local/include + /usr/include/opencv4 +) + +link_directories( + /usr/local/lib/ +) + +file(GLOB SRC_LISTS + ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp +) + +add_executable(run_test ${SRC_LISTS}) + +target_link_libraries(run_test + aidlite + ${OpenCV_LIBS} + pthread +) diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6406baac6c714f9aa87196ad9f7ef85783a87a64 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include // 用于 memcpy +#include +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; +using namespace Aidlux::Aidlite; + +// 定義顏色表 (19個類別) +const std::array, 19> label_colors = {{ + {0, 0, 0}, // 0=background + {128, 0, 0}, // 1=aeroplane + {0, 128, 0}, // 2=bicycle + {128, 128, 0}, // 3=bird + {0, 0, 128}, // 4=boat + {128, 0, 128}, // 5=bottle + {0, 128, 128}, // 6=bus + {128, 128, 128}, // 7=car + {64, 0, 0}, // 8=cat + {192, 0, 0}, // 9=chair + {64, 128, 0}, // 10=cow + {192, 128, 0}, // 11=dining table + {64, 0, 128}, // 12=dog + {192, 0, 128}, // 13=horse + {64, 128, 128}, // 14=motorbike + {192, 128, 128}, // 15=person + {0, 64, 0}, // 16=potted plant + {128, 64, 0}, // 17=sheep + {0, 192, 0}, // 18=sofa +}}; + +// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW) +std::vector bilinear_interpolate( + const float* input, int src_h, int src_w, int target_h, int target_w, int channels) { + + std::vector output(target_h * target_w * channels, 0.0f); + const float scale_h = static_cast(src_h) / target_h; + const float scale_w = static_cast(src_w) / target_w; + + for (int y = 0; y < target_h; ++y) { + const float y_src = (y + 0.5f) * scale_h - 0.5f; + const int y0 = std::max(0, std::min(static_cast(y_src), src_h - 1)); + const int y1 = std::max(0, std::min(y0 + 1, src_h - 1)); + const float dy = y_src - y0; + + for (int x = 0; x < target_w; ++x) { + const float x_src = (x + 0.5f) * scale_w - 0.5f; + const int x0 = std::max(0, std::min(static_cast(x_src), src_w - 1)); + const int x1 = std::max(0, std::min(x0 + 1, src_w - 1)); + const float dx = x_src - x0; + + for (int c = 0; c < channels; ++c) { + const int src_idx = c * src_h * src_w; + const float val00 = input[src_idx + y0 * src_w + x0]; + const float val01 = input[src_idx + y0 * src_w + x1]; + const float val10 = input[src_idx + y1 * src_w + x0]; + const float val11 = input[src_idx + y1 * src_w + x1]; + + const float val = (1 - dy) * (1 - dx) * val00 + + (1 - dy) * dx * val01 + + dy * (1 - dx) * val10 + + dy * dx * val11; + + output[c * target_h * target_w + y * target_w + x] = val; + } + } + } + return output; +} + +// Softmax 計算 (通道維度) +void softmax(float* data, int height, int width, int channels) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_val = -INFINITY; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + max_val = std::max(max_val, data[idx]); + } + + float sum_exp = 0.0f; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + sum_exp += std::exp(data[idx] - max_val); + } + + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + data[idx] = std::exp(data[idx] - max_val) / sum_exp; + } + } + } +} + +// 提取最大類別索引 +std::vector compute_predictions(const float* data, int height, int width, int channels) { + std::vector pred(height * width); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_prob = -INFINITY; + uint8_t max_idx = 0; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + if (data[idx] > max_prob) { + max_prob = data[idx]; + max_idx = c; + } + } + pred[y * width + x] = max_idx; + } + } + return pred; +} + +// 解碼為 RGB 圖像 +std::vector decode_segmap(const std::vector& pred, int height, int width) { + std::vector rgb(height * width * 3); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + const uint8_t label = pred[idx]; + if (label < 19) { + rgb[idx * 3] = label_colors[label][0]; + rgb[idx * 3 + 1] = label_colors[label][1]; + rgb[idx * 3 + 2] = label_colors[label][2]; + } else { + rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0; + } + } + } + return rgb; +} + +struct Args { + std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin"; + std::string imgs = "../2.png"; + int invoke_nums = 10; + std::string model_type = "QNN"; +}; + +Args parse_args(int argc, char* argv[]) { + Args args; + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "--target_model" && i + 1 < argc) { + args.target_model = argv[++i]; + } else if (arg == "--imgs" && i + 1 < argc) { + args.imgs = argv[++i]; + } else if (arg == "--invoke_nums" && i + 1 < argc) { + args.invoke_nums = std::stoi(argv[++i]); + } else if (arg == "--model_type" && i + 1 < argc) { + args.model_type = argv[++i]; + } + } + return args; +} + +std::string to_lower(const std::string& str) { + std::string lower_str = str; + std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { + return std::tolower(c); + }); + return lower_str; +} + +int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){ + + int current_coordinate[4] = {0, 0, 0, 0}; + for(int a = 0; a < src_dims[0]; ++a){ + current_coordinate[0] = a; + for(int b = 0; b < src_dims[1]; ++b){ + current_coordinate[1] = b; + for(int c = 0; c < src_dims[2]; ++c){ + current_coordinate[2] = c; + for(int d = 0; d < src_dims[3]; ++d){ + current_coordinate[3] = d; + + int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + + current_coordinate[1]*src_dims[2]*src_dims[3] + + current_coordinate[2]*src_dims[3] + + current_coordinate[3]; + + int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[3]]; + + dest[new_index] = src[old_index]; + } + } + } + } + + return EXIT_SUCCESS; +} + +cv::Mat post_process(cv::Mat &frame, float* outdata) +{ + cv::Mat input_image = frame.clone(); + // Initialize vectors to hold respective outputs while unwrapping detections. + std::vector class_ids; + std::vector confidences; + std::vector boxes; + std::vector masks; + std::vector class_scores; + cv::RNG rng; + cv::Mat masked_img; + + unsigned int src_dims[4] = {1, 64,128,19}; + unsigned int tsp_dims[4] = {0,3,1,2}; + unsigned int stride_data_num = 1*64*128*19; + float* format_data = new float[stride_data_num]; + transpose(outdata, src_dims, tsp_dims, format_data); + cv::Mat proto_buffer(19,64*128, CV_32F, format_data); + std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl; + + const int channels = 19; + int target_h = 64, target_w = 128; + int src_h = 64, src_w = 128; + // Step 1: 雙線性插值 + auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels); + + // Step 2: Softmax + softmax(interpolated.data(), target_h, target_w, channels); + + // Step 3: 獲取預測類別 + auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels); + + printf("Processing finished.\n"); + // Step 4: 解碼為 RGB + std::vector rgb_data = decode_segmap(predictions, target_h, target_w); + cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast(rgb_data.data())); + + // Step 2: 转换颜色通道 (RGB → BGR) + if (channels == 3) { + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + } + cv::Mat resized_cubic; + cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC); + return resized_cubic; +} + + +int invoke(const Args& args) { + std::cout << "Start main ... ... Model Path: " << args.target_model << "\n" + << "Image Path: " << args.imgs << "\n" + << "Inference Nums: " << args.invoke_nums << "\n" + << "Model Type: " << args.model_type << "\n"; + Model* model = Model::create_instance(args.target_model); + if(model == nullptr){ + printf("Create model failed !\n"); + return EXIT_FAILURE; + } + Config* config = Config::create_instance(); + if(config == nullptr){ + printf("Create config failed !\n"); + return EXIT_FAILURE; + } + config->implement_type = ImplementType::TYPE_LOCAL; + std::string model_type_lower = to_lower(args.model_type); + if (model_type_lower == "qnn"){ + config->framework_type = FrameworkType::TYPE_QNN; + } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") { + config->framework_type = FrameworkType::TYPE_SNPE2; + } + config->accelerate_type = AccelerateType::TYPE_DSP; + config->is_quantify_model = 1; + + unsigned int model_h = 512; + unsigned int model_w = 1024; + std::vector> input_shapes = {{1,3,model_h,model_w}}; + std::vector> output_shapes = {{1,64,128,19}}; + model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32); + std::unique_ptr fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config); + if(fast_interpreter == nullptr){ + printf("build_interpretper_from_model_and_config failed !\n"); + return EXIT_FAILURE; + } + int result = fast_interpreter->init(); + if(result != EXIT_SUCCESS){ + printf("interpreter->init() failed !\n"); + return EXIT_FAILURE; + } + // load model + fast_interpreter->load_model(); + if(result != EXIT_SUCCESS){ + printf("interpreter->load_model() failed !\n"); + return EXIT_FAILURE; + } + printf("detect model load success!\n"); + + cv::Mat frame = cv::imread(args.imgs); + if (frame.empty()) { + printf("detect image load failed!\n"); + return 1; + } + printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows); + cv::Mat input_data; + cv::Mat frame_clone = frame.clone(); + cv::Scalar stds_scale(58.395, 57.12, 57.375); + cv::Scalar means_scale(123.675, 116.28, 103.53); + cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h)); + frame_clone.convertTo(input_data, CV_32F); + cv::subtract(input_data, means_scale, input_data); + cv::divide(input_data, stds_scale, input_data); + + float *outdata0 = nullptr; + std::vector invoke_time; + for (int i = 0; i < args.invoke_nums; ++i) { + result = fast_interpreter->set_input_tensor(0, input_data.data); + if(result != EXIT_SUCCESS){ + printf("interpreter->set_input_tensor() failed !\n"); + return EXIT_FAILURE; + } + auto t1 = std::chrono::high_resolution_clock::now(); + result = fast_interpreter->invoke(); + auto t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration cost_time = t2 - t1; + invoke_time.push_back(cost_time.count() * 1000); + if(result != EXIT_SUCCESS){ + printf("interpreter->invoke() failed !\n"); + return EXIT_FAILURE; + } + uint32_t out_data_0 = 0; + result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0); + if(result != EXIT_SUCCESS){ + printf("interpreter->get_output_tensor() 1 failed !\n"); + return EXIT_FAILURE; + } + + } + + float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end()); + float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end()); + float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums; + float var_invoketime = 0.0f; + for (auto time : invoke_time) { + var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time); + } + var_invoketime /= args.invoke_nums; + printf("=======================================\n"); + printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", + args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime); + printf("=======================================\n"); + + cv::Mat img = post_process(frame, outdata0); + cv::imwrite("./results.png", img); + fast_interpreter->destory(); + return 0; +} + + +int main(int argc, char* argv[]) { + Args args = parse_args(argc, argv); + return invoke(args); +} diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b95ee0bb595dc5b18cd5d30a2f9a2563db8aa52 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e67a07dc0454b16d3363f2b0c92dcc87a10e3dc895fc1571b33bc2df53e3a81 +size 36449096 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py new file mode 100644 index 0000000000000000000000000000000000000000..27172051a516354af0da0271da4b8f4d2dde4de5 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py @@ -0,0 +1,133 @@ +import numpy as np +import torch +import cv2 +import sys +import time +import aidlite +import os + + +def decode_segmap(image, nc=19): + label_colors = np.array([(0, 0, 0), # 0=background + # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle + (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), + # 6=bus, 7=car, 8=cat, 9=chair, 10=cow + (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), + # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person + (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128), + # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor + (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)]) + r = np.zeros_like(image).astype(np.uint8) + g = np.zeros_like(image).astype(np.uint8) + b = np.zeros_like(image).astype(np.uint8) + for l in range(0, nc): + idx = image == l + r[idx] = label_colors[l, 0] + g[idx] = label_colors[l, 1] + b[idx] = label_colors[l, 2] + rgb = np.stack([r, g, b], axis=2) + return rgb + + + +class ffnet54sQnn: + def __init__(self): + super().__init__() + self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin")) + if self.model is None: + print("Create model failed !") + return + + self.config = aidlite.Config.create_instance() + if self.config is None: + print("build_interpretper_from_model_and_config failed !") + return + + self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL + self.config.framework_type = aidlite.FrameworkType.TYPE_QNN + self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP + # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU + self.config.is_quantify_model = 1 + + self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config) + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + return + input_shapes = [[1,512,1024,3]] + output_shapes = [[1,64,128,19]] + self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32, + output_shapes, aidlite.DataType.TYPE_FLOAT32) + + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + result = self.interpreter.init() + if result != 0: + print(f"interpreter init failed !") + result = self.interpreter.load_model() + if result != 0: + print("interpreter load model failed !") + + print(" model load success!") + + def __call__(self, input): + self.interpreter.set_input_tensor(0,input) + invoke_time=[] + invoke_nums =10 + for i in range(invoke_nums): + result = self.interpreter.set_input_tensor(0, input.data) + if result != 0: + print("interpreter set_input_tensor() failed") + t1=time.time() + result = self.interpreter.invoke() + cost_time = (time.time()-t1)*1000 + invoke_time.append(cost_time) + + max_invoke_time = max(invoke_time) + min_invoke_time = min(invoke_time) + mean_invoke_time = sum(invoke_time)/invoke_nums + var_invoketime=np.var(invoke_time) + print("====================================") + print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}") + print("====================================") + features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy() + return features_0 + + + + + +ffnet_segm = ffnet54sQnn() + +frame_ct=0 +image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png") + +image = cv2.imread(image_path) +image=cv2.resize(image,(1024,512)) +frame = np.ascontiguousarray(image[:,:,::-1]) + +mean_data=[123.675, 116.28, 103.53] +std_data=[58.395, 57.12, 57.375] +img_input = (frame-mean_data)/std_data # HWC +input = img_input.astype(np.float32) +input = input[np.newaxis, ...] +input_size = input.shape[1], input.shape[2] #H w +t0 = time.time() +out = ffnet_segm(input) +use_time = round((time.time() - t0) * 1000, 2) +print(f"pose detction inference_time:{use_time} ms") +out = np.transpose(out, (0, 3, 1,2)) +out = torch.from_numpy(out) + +output = torch.nn.functional.interpolate( + out, size=input_size, mode="bilinear", align_corners=False + ) +output_data = torch.nn.functional.softmax(output, dim=1).data +max_probs, predictions = output_data.max(1) + + +prediction = predictions.numpy().astype(np.uint8) +test = decode_segmap( prediction[0]) + +cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1]) +ffnet_segm.interpreter.destory() + diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/README.md b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b591af7035053aa002c834fcc9ca037bfa714c4 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/README.md @@ -0,0 +1,55 @@ +## Model Information +### Source model +- Input shape: 1x3x512x1024 +- Number of parameters:18.04M +- Model size:69.4MB, +- Output shape: 1x19x64x128 + +Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master) + +### Converted model + +- Precision: INT8 +- Backend: QNN2.16 +- Target Device: SNM972 QCS8550 + +## Inference with AidLite SDK + +### SDK installation +Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/) + +- install AidLite SDK + +```bash +# Install the appropriate version of the aidlite sdk +sudo aid-pkg update +sudo aid-pkg install aidlite-sdk +# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223 +sudo aid-pkg install aidlite-{QNN VERSION} +``` + +- Verify AidLite SDK + +```bash +# aidlite sdk c++ check +python3 -c "import aidlite ; print(aidlite.get_library_version())" + +# aidlite sdk python check +python3 -c "import aidlite ; print(aidlite.get_py_library_version())" +``` + +### Run demo +#### python +```bash +cd python +python3 demo_qnn.py +``` + +#### c++ +```bash +cd ffnet54s/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp +mkdir build && cd build +cmake .. +make +./run_test +``` \ No newline at end of file diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required (VERSION 3.5) +project("run_test") + +find_package(OpenCV REQUIRED) + +message(STATUS "oPENCV Library status:") +message(STATUS ">version:${OpenCV_VERSION}") +message(STATUS "Include:${OpenCV_INCLUDE_DIRS}") + +set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations") + +include_directories( + /usr/local/include + /usr/include/opencv4 +) + +link_directories( + /usr/local/lib/ +) + +file(GLOB SRC_LISTS + ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp +) + +add_executable(run_test ${SRC_LISTS}) + +target_link_libraries(run_test + aidlite + ${OpenCV_LIBS} + pthread +) diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..54c6d91879d41f2f63d566935d580525e53ed9b2 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include // 用于 memcpy +#include +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; +using namespace Aidlux::Aidlite; + +// 定義顏色表 (19個類別) +const std::array, 19> label_colors = {{ + {0, 0, 0}, // 0=background + {128, 0, 0}, // 1=aeroplane + {0, 128, 0}, // 2=bicycle + {128, 128, 0}, // 3=bird + {0, 0, 128}, // 4=boat + {128, 0, 128}, // 5=bottle + {0, 128, 128}, // 6=bus + {128, 128, 128}, // 7=car + {64, 0, 0}, // 8=cat + {192, 0, 0}, // 9=chair + {64, 128, 0}, // 10=cow + {192, 128, 0}, // 11=dining table + {64, 0, 128}, // 12=dog + {192, 0, 128}, // 13=horse + {64, 128, 128}, // 14=motorbike + {192, 128, 128}, // 15=person + {0, 64, 0}, // 16=potted plant + {128, 64, 0}, // 17=sheep + {0, 192, 0}, // 18=sofa +}}; + +// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW) +std::vector bilinear_interpolate( + const float* input, int src_h, int src_w, int target_h, int target_w, int channels) { + + std::vector output(target_h * target_w * channels, 0.0f); + const float scale_h = static_cast(src_h) / target_h; + const float scale_w = static_cast(src_w) / target_w; + + for (int y = 0; y < target_h; ++y) { + const float y_src = (y + 0.5f) * scale_h - 0.5f; + const int y0 = std::max(0, std::min(static_cast(y_src), src_h - 1)); + const int y1 = std::max(0, std::min(y0 + 1, src_h - 1)); + const float dy = y_src - y0; + + for (int x = 0; x < target_w; ++x) { + const float x_src = (x + 0.5f) * scale_w - 0.5f; + const int x0 = std::max(0, std::min(static_cast(x_src), src_w - 1)); + const int x1 = std::max(0, std::min(x0 + 1, src_w - 1)); + const float dx = x_src - x0; + + for (int c = 0; c < channels; ++c) { + const int src_idx = c * src_h * src_w; + const float val00 = input[src_idx + y0 * src_w + x0]; + const float val01 = input[src_idx + y0 * src_w + x1]; + const float val10 = input[src_idx + y1 * src_w + x0]; + const float val11 = input[src_idx + y1 * src_w + x1]; + + const float val = (1 - dy) * (1 - dx) * val00 + + (1 - dy) * dx * val01 + + dy * (1 - dx) * val10 + + dy * dx * val11; + + output[c * target_h * target_w + y * target_w + x] = val; + } + } + } + return output; +} + +// Softmax 計算 (通道維度) +void softmax(float* data, int height, int width, int channels) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_val = -INFINITY; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + max_val = std::max(max_val, data[idx]); + } + + float sum_exp = 0.0f; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + sum_exp += std::exp(data[idx] - max_val); + } + + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + data[idx] = std::exp(data[idx] - max_val) / sum_exp; + } + } + } +} + +// 提取最大類別索引 +std::vector compute_predictions(const float* data, int height, int width, int channels) { + std::vector pred(height * width); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_prob = -INFINITY; + uint8_t max_idx = 0; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + if (data[idx] > max_prob) { + max_prob = data[idx]; + max_idx = c; + } + } + pred[y * width + x] = max_idx; + } + } + return pred; +} + +// 解碼為 RGB 圖像 +std::vector decode_segmap(const std::vector& pred, int height, int width) { + std::vector rgb(height * width * 3); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + const uint8_t label = pred[idx]; + if (label < 19) { + rgb[idx * 3] = label_colors[label][0]; + rgb[idx * 3 + 1] = label_colors[label][1]; + rgb[idx * 3 + 2] = label_colors[label][2]; + } else { + rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0; + } + } + } + return rgb; +} + +struct Args { + std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"; + std::string imgs = "../2.png"; + int invoke_nums = 10; + std::string model_type = "QNN"; +}; + +Args parse_args(int argc, char* argv[]) { + Args args; + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "--target_model" && i + 1 < argc) { + args.target_model = argv[++i]; + } else if (arg == "--imgs" && i + 1 < argc) { + args.imgs = argv[++i]; + } else if (arg == "--invoke_nums" && i + 1 < argc) { + args.invoke_nums = std::stoi(argv[++i]); + } else if (arg == "--model_type" && i + 1 < argc) { + args.model_type = argv[++i]; + } + } + return args; +} + +std::string to_lower(const std::string& str) { + std::string lower_str = str; + std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { + return std::tolower(c); + }); + return lower_str; +} + +int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){ + + int current_coordinate[4] = {0, 0, 0, 0}; + for(int a = 0; a < src_dims[0]; ++a){ + current_coordinate[0] = a; + for(int b = 0; b < src_dims[1]; ++b){ + current_coordinate[1] = b; + for(int c = 0; c < src_dims[2]; ++c){ + current_coordinate[2] = c; + for(int d = 0; d < src_dims[3]; ++d){ + current_coordinate[3] = d; + + int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + + current_coordinate[1]*src_dims[2]*src_dims[3] + + current_coordinate[2]*src_dims[3] + + current_coordinate[3]; + + int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[3]]; + + dest[new_index] = src[old_index]; + } + } + } + } + + return EXIT_SUCCESS; +} + +cv::Mat post_process(cv::Mat &frame, float* outdata) +{ + cv::Mat input_image = frame.clone(); + // Initialize vectors to hold respective outputs while unwrapping detections. + std::vector class_ids; + std::vector confidences; + std::vector boxes; + std::vector masks; + std::vector class_scores; + cv::RNG rng; + cv::Mat masked_img; + + unsigned int src_dims[4] = {1, 64,128,19}; + unsigned int tsp_dims[4] = {0,3,1,2}; + unsigned int stride_data_num = 1*64*128*19; + float* format_data = new float[stride_data_num]; + transpose(outdata, src_dims, tsp_dims, format_data); + cv::Mat proto_buffer(19,64*128, CV_32F, format_data); + std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl; + + const int channels = 19; + int target_h = 64, target_w = 128; + int src_h = 64, src_w = 128; + // Step 1: 雙線性插值 + auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels); + + // Step 2: Softmax + softmax(interpolated.data(), target_h, target_w, channels); + + // Step 3: 獲取預測類別 + auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels); + + printf("Processing finished.\n"); + // Step 4: 解碼為 RGB + std::vector rgb_data = decode_segmap(predictions, target_h, target_w); + cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast(rgb_data.data())); + + // Step 2: 转换颜色通道 (RGB → BGR) + if (channels == 3) { + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + } + cv::Mat resized_cubic; + cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC); + return resized_cubic; +} + + +int invoke(const Args& args) { + std::cout << "Start main ... ... Model Path: " << args.target_model << "\n" + << "Image Path: " << args.imgs << "\n" + << "Inference Nums: " << args.invoke_nums << "\n" + << "Model Type: " << args.model_type << "\n"; + Model* model = Model::create_instance(args.target_model); + if(model == nullptr){ + printf("Create model failed !\n"); + return EXIT_FAILURE; + } + Config* config = Config::create_instance(); + if(config == nullptr){ + printf("Create config failed !\n"); + return EXIT_FAILURE; + } + config->implement_type = ImplementType::TYPE_LOCAL; + std::string model_type_lower = to_lower(args.model_type); + if (model_type_lower == "qnn"){ + config->framework_type = FrameworkType::TYPE_QNN; + } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") { + config->framework_type = FrameworkType::TYPE_SNPE2; + } + config->accelerate_type = AccelerateType::TYPE_DSP; + config->is_quantify_model = 1; + + unsigned int model_h = 512; + unsigned int model_w = 1024; + std::vector> input_shapes = {{1,3,model_h,model_w}}; + std::vector> output_shapes = {{1,64,128,19}}; + model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32); + std::unique_ptr fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config); + if(fast_interpreter == nullptr){ + printf("build_interpretper_from_model_and_config failed !\n"); + return EXIT_FAILURE; + } + int result = fast_interpreter->init(); + if(result != EXIT_SUCCESS){ + printf("interpreter->init() failed !\n"); + return EXIT_FAILURE; + } + // load model + fast_interpreter->load_model(); + if(result != EXIT_SUCCESS){ + printf("interpreter->load_model() failed !\n"); + return EXIT_FAILURE; + } + printf("detect model load success!\n"); + + cv::Mat frame = cv::imread(args.imgs); + if (frame.empty()) { + printf("detect image load failed!\n"); + return 1; + } + printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows); + cv::Mat input_data; + cv::Mat frame_clone = frame.clone(); + cv::Scalar stds_scale(58.395, 57.12, 57.375); + cv::Scalar means_scale(123.675, 116.28, 103.53); + cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h)); + frame_clone.convertTo(input_data, CV_32F); + cv::subtract(input_data, means_scale, input_data); + cv::divide(input_data, stds_scale, input_data); + + float *outdata0 = nullptr; + std::vector invoke_time; + for (int i = 0; i < args.invoke_nums; ++i) { + result = fast_interpreter->set_input_tensor(0, input_data.data); + if(result != EXIT_SUCCESS){ + printf("interpreter->set_input_tensor() failed !\n"); + return EXIT_FAILURE; + } + auto t1 = std::chrono::high_resolution_clock::now(); + result = fast_interpreter->invoke(); + auto t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration cost_time = t2 - t1; + invoke_time.push_back(cost_time.count() * 1000); + if(result != EXIT_SUCCESS){ + printf("interpreter->invoke() failed !\n"); + return EXIT_FAILURE; + } + uint32_t out_data_0 = 0; + result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0); + if(result != EXIT_SUCCESS){ + printf("interpreter->get_output_tensor() 1 failed !\n"); + return EXIT_FAILURE; + } + + } + + float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end()); + float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end()); + float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums; + float var_invoketime = 0.0f; + for (auto time : invoke_time) { + var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time); + } + var_invoketime /= args.invoke_nums; + printf("=======================================\n"); + printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", + args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime); + printf("=======================================\n"); + + cv::Mat img = post_process(frame, outdata0); + cv::imwrite("./results.png", img); + fast_interpreter->destory(); + return 0; +} + + +int main(int argc, char* argv[]) { + Args args = parse_args(argc, argv); + return invoke(args); +} diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin new file mode 100644 index 0000000000000000000000000000000000000000..879e8d7d6b4c74e71402381d8a9e6a95613d1745 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42a1d0fa4cf46db4a4c6559188665f2cc8c1a5f1709fffbebd20df55f8c0a94 +size 18291888 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py new file mode 100644 index 0000000000000000000000000000000000000000..b42dab13ce7ea1acbf84d8a68c95a69de7978597 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py @@ -0,0 +1,133 @@ +import numpy as np +import torch +import cv2 +import sys +import time +import aidlite +import os + + +def decode_segmap(image, nc=19): + label_colors = np.array([(0, 0, 0), # 0=background + # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle + (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), + # 6=bus, 7=car, 8=cat, 9=chair, 10=cow + (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), + # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person + (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128), + # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor + (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)]) + r = np.zeros_like(image).astype(np.uint8) + g = np.zeros_like(image).astype(np.uint8) + b = np.zeros_like(image).astype(np.uint8) + for l in range(0, nc): + idx = image == l + r[idx] = label_colors[l, 0] + g[idx] = label_colors[l, 1] + b[idx] = label_colors[l, 2] + rgb = np.stack([r, g, b], axis=2) + return rgb + + + +class ffnet54sQnn: + def __init__(self): + super().__init__() + self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin")) + if self.model is None: + print("Create model failed !") + return + + self.config = aidlite.Config.create_instance() + if self.config is None: + print("build_interpretper_from_model_and_config failed !") + return + + self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL + self.config.framework_type = aidlite.FrameworkType.TYPE_QNN + self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP + # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU + self.config.is_quantify_model = 1 + + self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config) + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + return + input_shapes = [[1,512,1024,3]] + output_shapes = [[1,64,128,19]] + self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32, + output_shapes, aidlite.DataType.TYPE_FLOAT32) + + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + result = self.interpreter.init() + if result != 0: + print(f"interpreter init failed !") + result = self.interpreter.load_model() + if result != 0: + print("interpreter load model failed !") + + print(" model load success!") + + def __call__(self, input): + self.interpreter.set_input_tensor(0,input) + invoke_time=[] + invoke_nums =10 + for i in range(invoke_nums): + result = self.interpreter.set_input_tensor(0, input.data) + if result != 0: + print("interpreter set_input_tensor() failed") + t1=time.time() + result = self.interpreter.invoke() + cost_time = (time.time()-t1)*1000 + invoke_time.append(cost_time) + + max_invoke_time = max(invoke_time) + min_invoke_time = min(invoke_time) + mean_invoke_time = sum(invoke_time)/invoke_nums + var_invoketime=np.var(invoke_time) + print("====================================") + print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}") + print("====================================") + features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy() + return features_0 + + + + + +ffnet_segm = ffnet54sQnn() + +frame_ct=0 +image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png") + +image = cv2.imread(image_path) +image=cv2.resize(image,(1024,512)) +frame = np.ascontiguousarray(image[:,:,::-1]) + +mean_data=[123.675, 116.28, 103.53] +std_data=[58.395, 57.12, 57.375] +img_input = (frame-mean_data)/std_data # HWC +input = img_input.astype(np.float32) +input = input[np.newaxis, ...] +input_size = input.shape[1], input.shape[2] #H w +t0 = time.time() +out = ffnet_segm(input) +use_time = round((time.time() - t0) * 1000, 2) +print(f"pose detction inference_time:{use_time} ms") +out = np.transpose(out, (0, 3, 1,2)) +out = torch.from_numpy(out) + +output = torch.nn.functional.interpolate( + out, size=input_size, mode="bilinear", align_corners=False + ) +output_data = torch.nn.functional.softmax(output, dim=1).data +max_probs, predictions = output_data.max(1) + + +prediction = predictions.numpy().astype(np.uint8) +test = decode_segmap( prediction[0]) + +cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1]) +ffnet_segm.interpreter.destory() + diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/README.md b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ab54a7d143393e53d0a38fb84fd0659c4167b10 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/README.md @@ -0,0 +1,55 @@ +## Model Information +### Source model +- Input shape: 1x3x512x1024 +- Number of parameters:18.04M +- Model size:69.4MB, +- Output shape: 1x19x64x128 + +Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master) + +### Converted model + +- Precision: W8A16 +- Backend: QNN2.16 +- Target Device: SNM972 QCS8550 + +## Inference with AidLite SDK + +### SDK installation +Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/) + +- install AidLite SDK + +```bash +# Install the appropriate version of the aidlite sdk +sudo aid-pkg update +sudo aid-pkg install aidlite-sdk +# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223 +sudo aid-pkg install aidlite-{QNN VERSION} +``` + +- Verify AidLite SDK + +```bash +# aidlite sdk c++ check +python3 -c "import aidlite ; print(aidlite.get_library_version())" + +# aidlite sdk python check +python3 -c "import aidlite ; print(aidlite.get_py_library_version())" +``` + +### Run demo +#### python +```bash +cd python +python3 demo_qnn.py +``` + +#### c++ +```bash +cd ffnet54s/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp +mkdir build && cd build +cmake .. +make +./run_test +``` diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required (VERSION 3.5) +project("run_test") + +find_package(OpenCV REQUIRED) + +message(STATUS "oPENCV Library status:") +message(STATUS ">version:${OpenCV_VERSION}") +message(STATUS "Include:${OpenCV_INCLUDE_DIRS}") + +set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations") + +include_directories( + /usr/local/include + /usr/include/opencv4 +) + +link_directories( + /usr/local/lib/ +) + +file(GLOB SRC_LISTS + ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp +) + +add_executable(run_test ${SRC_LISTS}) + +target_link_libraries(run_test + aidlite + ${OpenCV_LIBS} + pthread +) diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..98864a05827949b978ded878584179bcf01aff06 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include // 用于 memcpy +#include +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; +using namespace Aidlux::Aidlite; + +// 定義顏色表 (19個類別) +const std::array, 19> label_colors = {{ + {0, 0, 0}, // 0=background + {128, 0, 0}, // 1=aeroplane + {0, 128, 0}, // 2=bicycle + {128, 128, 0}, // 3=bird + {0, 0, 128}, // 4=boat + {128, 0, 128}, // 5=bottle + {0, 128, 128}, // 6=bus + {128, 128, 128}, // 7=car + {64, 0, 0}, // 8=cat + {192, 0, 0}, // 9=chair + {64, 128, 0}, // 10=cow + {192, 128, 0}, // 11=dining table + {64, 0, 128}, // 12=dog + {192, 0, 128}, // 13=horse + {64, 128, 128}, // 14=motorbike + {192, 128, 128}, // 15=person + {0, 64, 0}, // 16=potted plant + {128, 64, 0}, // 17=sheep + {0, 192, 0}, // 18=sofa +}}; + +// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW) +std::vector bilinear_interpolate( + const float* input, int src_h, int src_w, int target_h, int target_w, int channels) { + + std::vector output(target_h * target_w * channels, 0.0f); + const float scale_h = static_cast(src_h) / target_h; + const float scale_w = static_cast(src_w) / target_w; + + for (int y = 0; y < target_h; ++y) { + const float y_src = (y + 0.5f) * scale_h - 0.5f; + const int y0 = std::max(0, std::min(static_cast(y_src), src_h - 1)); + const int y1 = std::max(0, std::min(y0 + 1, src_h - 1)); + const float dy = y_src - y0; + + for (int x = 0; x < target_w; ++x) { + const float x_src = (x + 0.5f) * scale_w - 0.5f; + const int x0 = std::max(0, std::min(static_cast(x_src), src_w - 1)); + const int x1 = std::max(0, std::min(x0 + 1, src_w - 1)); + const float dx = x_src - x0; + + for (int c = 0; c < channels; ++c) { + const int src_idx = c * src_h * src_w; + const float val00 = input[src_idx + y0 * src_w + x0]; + const float val01 = input[src_idx + y0 * src_w + x1]; + const float val10 = input[src_idx + y1 * src_w + x0]; + const float val11 = input[src_idx + y1 * src_w + x1]; + + const float val = (1 - dy) * (1 - dx) * val00 + + (1 - dy) * dx * val01 + + dy * (1 - dx) * val10 + + dy * dx * val11; + + output[c * target_h * target_w + y * target_w + x] = val; + } + } + } + return output; +} + +// Softmax 計算 (通道維度) +void softmax(float* data, int height, int width, int channels) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_val = -INFINITY; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + max_val = std::max(max_val, data[idx]); + } + + float sum_exp = 0.0f; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + sum_exp += std::exp(data[idx] - max_val); + } + + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + data[idx] = std::exp(data[idx] - max_val) / sum_exp; + } + } + } +} + +// 提取最大類別索引 +std::vector compute_predictions(const float* data, int height, int width, int channels) { + std::vector pred(height * width); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float max_prob = -INFINITY; + uint8_t max_idx = 0; + for (int c = 0; c < channels; ++c) { + const int idx = c * height * width + y * width + x; + if (data[idx] > max_prob) { + max_prob = data[idx]; + max_idx = c; + } + } + pred[y * width + x] = max_idx; + } + } + return pred; +} + +// 解碼為 RGB 圖像 +std::vector decode_segmap(const std::vector& pred, int height, int width) { + std::vector rgb(height * width * 3); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int idx = y * width + x; + const uint8_t label = pred[idx]; + if (label < 19) { + rgb[idx * 3] = label_colors[label][0]; + rgb[idx * 3 + 1] = label_colors[label][1]; + rgb[idx * 3 + 2] = label_colors[label][2]; + } else { + rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0; + } + } + } + return rgb; +} + +struct Args { + std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"; + std::string imgs = "../2.png"; + int invoke_nums = 10; + std::string model_type = "QNN"; +}; + +Args parse_args(int argc, char* argv[]) { + Args args; + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "--target_model" && i + 1 < argc) { + args.target_model = argv[++i]; + } else if (arg == "--imgs" && i + 1 < argc) { + args.imgs = argv[++i]; + } else if (arg == "--invoke_nums" && i + 1 < argc) { + args.invoke_nums = std::stoi(argv[++i]); + } else if (arg == "--model_type" && i + 1 < argc) { + args.model_type = argv[++i]; + } + } + return args; +} + +std::string to_lower(const std::string& str) { + std::string lower_str = str; + std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { + return std::tolower(c); + }); + return lower_str; +} + +int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){ + + int current_coordinate[4] = {0, 0, 0, 0}; + for(int a = 0; a < src_dims[0]; ++a){ + current_coordinate[0] = a; + for(int b = 0; b < src_dims[1]; ++b){ + current_coordinate[1] = b; + for(int c = 0; c < src_dims[2]; ++c){ + current_coordinate[2] = c; + for(int d = 0; d < src_dims[3]; ++d){ + current_coordinate[3] = d; + + int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + + current_coordinate[1]*src_dims[2]*src_dims[3] + + current_coordinate[2]*src_dims[3] + + current_coordinate[3]; + + int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + + current_coordinate[tsp_dims[3]]; + + dest[new_index] = src[old_index]; + } + } + } + } + + return EXIT_SUCCESS; +} + +cv::Mat post_process(cv::Mat &frame, float* outdata) +{ + cv::Mat input_image = frame.clone(); + // Initialize vectors to hold respective outputs while unwrapping detections. + std::vector class_ids; + std::vector confidences; + std::vector boxes; + std::vector masks; + std::vector class_scores; + cv::RNG rng; + cv::Mat masked_img; + + unsigned int src_dims[4] = {1, 64,128,19}; + unsigned int tsp_dims[4] = {0,3,1,2}; + unsigned int stride_data_num = 1*64*128*19; + float* format_data = new float[stride_data_num]; + transpose(outdata, src_dims, tsp_dims, format_data); + cv::Mat proto_buffer(19,64*128, CV_32F, format_data); + std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl; + + const int channels = 19; + int target_h = 64, target_w = 128; + int src_h = 64, src_w = 128; + // Step 1: 雙線性插值 + auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels); + + // Step 2: Softmax + softmax(interpolated.data(), target_h, target_w, channels); + + // Step 3: 獲取預測類別 + auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels); + + printf("Processing finished.\n"); + // Step 4: 解碼為 RGB + std::vector rgb_data = decode_segmap(predictions, target_h, target_w); + cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast(rgb_data.data())); + + // Step 2: 转换颜色通道 (RGB → BGR) + if (channels == 3) { + cv::cvtColor(image, image, cv::COLOR_RGB2BGR); + } + cv::Mat resized_cubic; + cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC); + return resized_cubic; +} + + +int invoke(const Args& args) { + std::cout << "Start main ... ... Model Path: " << args.target_model << "\n" + << "Image Path: " << args.imgs << "\n" + << "Inference Nums: " << args.invoke_nums << "\n" + << "Model Type: " << args.model_type << "\n"; + Model* model = Model::create_instance(args.target_model); + if(model == nullptr){ + printf("Create model failed !\n"); + return EXIT_FAILURE; + } + Config* config = Config::create_instance(); + if(config == nullptr){ + printf("Create config failed !\n"); + return EXIT_FAILURE; + } + config->implement_type = ImplementType::TYPE_LOCAL; + std::string model_type_lower = to_lower(args.model_type); + if (model_type_lower == "qnn"){ + config->framework_type = FrameworkType::TYPE_QNN; + } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") { + config->framework_type = FrameworkType::TYPE_SNPE2; + } + config->accelerate_type = AccelerateType::TYPE_DSP; + config->is_quantify_model = 1; + + unsigned int model_h = 512; + unsigned int model_w = 1024; + std::vector> input_shapes = {{1,3,model_h,model_w}}; + std::vector> output_shapes = {{1,64,128,19}}; + model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32); + std::unique_ptr fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config); + if(fast_interpreter == nullptr){ + printf("build_interpretper_from_model_and_config failed !\n"); + return EXIT_FAILURE; + } + int result = fast_interpreter->init(); + if(result != EXIT_SUCCESS){ + printf("interpreter->init() failed !\n"); + return EXIT_FAILURE; + } + // load model + fast_interpreter->load_model(); + if(result != EXIT_SUCCESS){ + printf("interpreter->load_model() failed !\n"); + return EXIT_FAILURE; + } + printf("detect model load success!\n"); + + cv::Mat frame = cv::imread(args.imgs); + if (frame.empty()) { + printf("detect image load failed!\n"); + return 1; + } + printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows); + cv::Mat input_data; + cv::Mat frame_clone = frame.clone(); + cv::Scalar stds_scale(58.395, 57.12, 57.375); + cv::Scalar means_scale(123.675, 116.28, 103.53); + cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h)); + frame_clone.convertTo(input_data, CV_32F); + cv::subtract(input_data, means_scale, input_data); + cv::divide(input_data, stds_scale, input_data); + + float *outdata0 = nullptr; + std::vector invoke_time; + for (int i = 0; i < args.invoke_nums; ++i) { + result = fast_interpreter->set_input_tensor(0, input_data.data); + if(result != EXIT_SUCCESS){ + printf("interpreter->set_input_tensor() failed !\n"); + return EXIT_FAILURE; + } + auto t1 = std::chrono::high_resolution_clock::now(); + result = fast_interpreter->invoke(); + auto t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration cost_time = t2 - t1; + invoke_time.push_back(cost_time.count() * 1000); + if(result != EXIT_SUCCESS){ + printf("interpreter->invoke() failed !\n"); + return EXIT_FAILURE; + } + uint32_t out_data_0 = 0; + result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0); + if(result != EXIT_SUCCESS){ + printf("interpreter->get_output_tensor() 1 failed !\n"); + return EXIT_FAILURE; + } + + } + + float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end()); + float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end()); + float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums; + float var_invoketime = 0.0f; + for (auto time : invoke_time) { + var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time); + } + var_invoketime /= args.invoke_nums; + printf("=======================================\n"); + printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", + args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime); + printf("=======================================\n"); + + cv::Mat img = post_process(frame, outdata0); + cv::imwrite("./results.png", img); + fast_interpreter->destory(); + return 0; +} + + +int main(int argc, char* argv[]) { + Args args = parse_args(argc, argv); + return invoke(args); +} diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin new file mode 100644 index 0000000000000000000000000000000000000000..262febd7d807c4295436ab7949dce159f7ec8248 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:280ea671091e591e615d5378b33d7839622c4467db70c4ff068b4fd903901fdf +size 18418864 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png new file mode 100644 index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101 +size 2281350 diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py new file mode 100644 index 0000000000000000000000000000000000000000..f0121a28b8a07382c6d402ef086c58e8019d35c3 --- /dev/null +++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py @@ -0,0 +1,133 @@ +import numpy as np +import torch +import cv2 +import sys +import time +import aidlite +import os + + +def decode_segmap(image, nc=19): + label_colors = np.array([(0, 0, 0), # 0=background + # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle + (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), + # 6=bus, 7=car, 8=cat, 9=chair, 10=cow + (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), + # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person + (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128), + # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor + (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)]) + r = np.zeros_like(image).astype(np.uint8) + g = np.zeros_like(image).astype(np.uint8) + b = np.zeros_like(image).astype(np.uint8) + for l in range(0, nc): + idx = image == l + r[idx] = label_colors[l, 0] + g[idx] = label_colors[l, 1] + b[idx] = label_colors[l, 2] + rgb = np.stack([r, g, b], axis=2) + return rgb + + + +class ffnet54sQnn: + def __init__(self): + super().__init__() + self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin")) + if self.model is None: + print("Create model failed !") + return + + self.config = aidlite.Config.create_instance() + if self.config is None: + print("build_interpretper_from_model_and_config failed !") + return + + self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL + self.config.framework_type = aidlite.FrameworkType.TYPE_QNN + self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP + # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU + self.config.is_quantify_model = 1 + + self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config) + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + return + input_shapes = [[1,512,1024,3]] + output_shapes = [[1,64,128,19]] + self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32, + output_shapes, aidlite.DataType.TYPE_FLOAT32) + + if self.interpreter is None: + print("build_interpretper_from_model_and_config failed !") + result = self.interpreter.init() + if result != 0: + print(f"interpreter init failed !") + result = self.interpreter.load_model() + if result != 0: + print("interpreter load model failed !") + + print(" model load success!") + + def __call__(self, input): + self.interpreter.set_input_tensor(0,input) + invoke_time=[] + invoke_nums =10 + for i in range(invoke_nums): + result = self.interpreter.set_input_tensor(0, input.data) + if result != 0: + print("interpreter set_input_tensor() failed") + t1=time.time() + result = self.interpreter.invoke() + cost_time = (time.time()-t1)*1000 + invoke_time.append(cost_time) + + max_invoke_time = max(invoke_time) + min_invoke_time = min(invoke_time) + mean_invoke_time = sum(invoke_time)/invoke_nums + var_invoketime=np.var(invoke_time) + print("====================================") + print(f"QNN invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}") + print("====================================") + features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy() + return features_0 + + + + + +ffnet_segm = ffnet54sQnn() + +frame_ct=0 +image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png") + +image = cv2.imread(image_path) +image=cv2.resize(image,(1024,512)) +frame = np.ascontiguousarray(image[:,:,::-1]) + +mean_data=[123.675, 116.28, 103.53] +std_data=[58.395, 57.12, 57.375] +img_input = (frame-mean_data)/std_data # HWC +input = img_input.astype(np.float32) +input = input[np.newaxis, ...] +input_size = input.shape[1], input.shape[2] #H w +t0 = time.time() +out = ffnet_segm(input) +use_time = round((time.time() - t0) * 1000, 2) +print(f"pose detction inference_time:{use_time} ms") +out = np.transpose(out, (0, 3, 1,2)) +out = torch.from_numpy(out) + +output = torch.nn.functional.interpolate( + out, size=input_size, mode="bilinear", align_corners=False + ) +output_data = torch.nn.functional.softmax(output, dim=1).data +max_probs, predictions = output_data.max(1) + + +prediction = predictions.numpy().astype(np.uint8) +test = decode_segmap( prediction[0]) + +cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1]) +ffnet_segm.interpreter.destory() +