qc903113684 commited on 8 days ago

Commit

aaea8de

verified ·

1 Parent(s): b27ae22

Upload 35 files

Browse files

Files changed (36) hide show

.gitattributes +10 -0
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/README.md +55 -0
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png +3 -0
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +31 -0
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp +366 -0
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin +3 -0
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png +3 -0
model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py +133 -0
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/README.md +55 -0
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png +3 -0
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +31 -0
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +366 -0
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin +3 -0
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png +3 -0
model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py +133 -0
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/README.md +55 -0
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt +31 -0
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp +366 -0
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py +133 -0
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/README.md +55 -0
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +31 -0
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp +366 -0
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py +133 -0
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/README.md +55 -0
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +31 -0
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +366 -0
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png +3 -0
model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py +133 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text

model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+## Model Information
+### Source model
+- Input shape: 1x3x512x1024
+- Number of parameters:13.911M
+- Model size:53.56MB,
+- Output shape: 1x19x64x128
+Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+### Converted model
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### cpp
+```bash
+cd ffnet40s/model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)

model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,366 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+struct Args {
+    std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN216;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:047345e0f0511d5df1e64a01f2f420336de2e6863e688509dfb512aac707b2e0
+size 14178304

model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+class ffnet40sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+ffnet_segm = ffnet40sQnn()
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data
+max_probs, predictions = output_data.max(1)
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()

model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+## Model Information
+### Source model
+- Input shape: 1x3x512x1024
+- Number of parameters:13.911M
+- Model size:53.56MB,
+- Output shape: 1x19x64x128
+Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+### Converted model
+- Precision: W8A16
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### cpp
+```bash
+cd ffnet40s/model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)

model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,366 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+struct Args {
+    std::string target_model = "../../models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN216;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet40S_BBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49d2e1e62c7c9dec18911ab610f56725ed953d35e5fa2bb3df52d9dce6f8f25a
+size 14403648

model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+class ffnet40sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+ffnet_segm = ffnet40sQnn()
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data
+max_probs, predictions = output_data.max(1)
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()

model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+## Model Information
+### Source model
+- Input shape: 1x3x512x1024
+- Number of parameters:13.911M
+- Model size:53.56MB,
+- Output shape: 1x19x64x128
+Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+### Converted model
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### cpp
+```bash
+cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)

model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,366 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+struct Args {
+    std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN216;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50c93cf5c0f77e1db8c6ef4708fd80a3f0b2fc7be2958b2afa55d3ce3b84e4cf
+size 28144512

model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+class ffnet40sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+ffnet_segm = ffnet40sQnn()
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data
+max_probs, predictions = output_data.max(1)
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()

model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+## Model Information
+### Source model
+- Input shape: 1x3x512x1024
+- Number of parameters:13.911M
+- Model size:53.56MB,
+- Output shape: 1x19x64x128
+Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+### Converted model
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### cpp
+```bash
+cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)

model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,366 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+struct Args {
+    std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN216;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1195497a2b5198ec425069d2c62685e1398ee58cd51ba20fec110042937d23bd
+size 14129152

model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+class ffnet40sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+ffnet_segm = ffnet40sQnn()
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data
+max_probs, predictions = output_data.max(1)
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()

model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+## Model Information
+### Source model
+- Input shape: 1x3x512x1024
+- Number of parameters:13.911M
+- Model size:53.56MB,
+- Output shape: 1x19x64x128
+Source model repository: [ffnet40s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+### Converted model
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### cpp
+```bash
+cd ffnet40s/model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)

model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,366 @@

+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+struct Args {
+    std::string target_model = "../../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN216;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67c14ec4ae6d19b5a0670d5c02616e40b1012e1325065b76f6db532c6df53dba
+size 14235648

model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png ADDED Viewed

Git LFS Details

SHA256: 202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

model_farm_ffnet40s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+class ffnet40sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet40S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+ffnet_segm = ffnet40sQnn()
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data
+max_probs, predictions = output_data.max(1)
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()