qc903113684 commited on 7 days ago

Commit

af0dd4f

verified ·

1 Parent(s): 274c5c0

Upload 47 files

Browse files

Files changed (47) hide show

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/README.md +57 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/cpp/CMakeLists.txt +32 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/cpp/baboon.png +0 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/cpp/run_test.cpp +243 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/models/RRDB_ESRGAN_x4.pth +3 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/models/m_RRDB_esrgan_x4.pt +3 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/models/m_RRDB_esrgan_x4_w8a16.qnn216.ctx.bin +3 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/python/LR/baboon.png +0 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc +0 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc +0 -0
model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/python/demo_qnn.py +84 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/README.md +47 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +32 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/cpp/baboon.png +0 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp +243 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/models/m_RRDB_esrgan_x4_w8a8.qnn216.ctx.bin +3 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/python/LR/baboon.png +0 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc +0 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc +0 -0
model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py +115 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/README.md +57 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt +32 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/cpp/baboon.png +0 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp +243 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/models/m_RRDB_esrgan_x4_fp16.qnn216.ctx.bin +3 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/python/LR/baboon.png +0 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc +0 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc +0 -0
model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py +115 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/README.md +57 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +32 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/cpp/baboon.png +0 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp +243 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/models/m_RRDB_esrgan_x4_w8a8.qnn216.ctx.bin +3 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/python/LR/baboon.png +0 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc +0 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc +0 -0
model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py +115 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/README.md +57 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt +32 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/cpp/baboon.png +0 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp +243 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/models/m_RRDB_esrgan_x4_w8a16.qnn216.ctx.bin +3 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/python/LR/baboon.png +0 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc +0 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc +0 -0
model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py +115 -0

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+## Model Information
+### Source model
+- Input shape: 128x128
+- Number of parameters: 16.69M
+- Model size: 63.8MB
+- Output shape: 1x3x512x512
+Source model repository: [ESRGAN](https://github.com/xinntao/ESRGAN/)
+### Converted Model
+- Precision: W8A16
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### c++
+```bash
+cd esrgan/model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+    jsoncpp
+)

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/cpp/baboon.png ADDED Viewed

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,243 @@

+#include <iostream>
+#include <fstream>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <jsoncpp/json/json.h>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/m_RRDB_esrgan_x4_w8a16.qnn216.ctx.bin";
+    std::string imgs = "../baboon.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+void save_output_image_from_nhwc(float* output) {
+    unsigned int H = 512;
+    unsigned int W = 512;
+    unsigned int C = 3;
+    // Step 1: clip [0,1]
+    std::vector<float> clipped(H * W * C);
+    for (int i = 0; i < H * W * C; ++i) {
+        clipped[i] = std::min(1.0f, std::max(0.0f, output[i]));
+    }
+    // Step 2: NHWC (H,W,C) to CHW (C,H,W)
+    unsigned int src_dims1[4] = {H, W, C, 1};
+    unsigned int tsp_dims1[4] = {2, 0, 1, 3};
+    std::vector<float> chw(C * H * W);
+    transpose(clipped.data(), src_dims1, tsp_dims1, chw.data());
+    // Step 3: RGB to BGR
+    std::vector<float> chw_bgr(C * H * W);
+    for (int h = 0; h < H; ++h) {
+        for (int w = 0; w < W; ++w) {
+            for (int c = 0; c < C; ++c) {
+                int src_index = c * H * W + h * W + w;
+                int dst_c = 2 - c;
+                int dst_index = dst_c * H * W + h * W + w;
+                chw_bgr[dst_index] = chw[src_index];
+            }
+        }
+    }
+    // Step 4: CHW to HWC
+    unsigned int src_dims2[4] = {C, H, W, 1};
+    unsigned int tsp_dims2[4] = {1, 2, 0, 3};
+    std::vector<float> hwc(H * W * C);
+    transpose(chw_bgr.data(), src_dims2, tsp_dims2, hwc.data());
+    // Step 5: Convert to CV_8UC3 image
+    cv::Mat result(H, W, CV_8UC3);
+    for (int y = 0; y < H; ++y) {
+        for (int x = 0; x < W; ++x) {
+            int idx = (y * W + x) * C;
+            uchar b = static_cast<uchar>(std::round(hwc[idx + 0] * 255.0f));
+            uchar g = static_cast<uchar>(std::round(hwc[idx + 1] * 255.0f));
+            uchar r = static_cast<uchar>(std::round(hwc[idx + 2] * 255.0f));
+            result.at<cv::Vec3b>(y, x) = cv::Vec3b(b, g, r);
+        }
+    }
+    // Save the image
+    cv::imwrite("./result_img.jpg", result);
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 128;
+    unsigned int model_w = 128;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,3,model_h*4,model_w*4}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::cvtColor(frame_clone, frame_clone, cv::COLOR_BGR2RGB);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32FC3, 1.0 / 255.0);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    //  post process
+    save_output_image_from_nhwc(outdata0);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/models/RRDB_ESRGAN_x4.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65fece06e1ccb48853242aa972bdf00ad07a7dd8938d2dcbdf4221b59f6372ce
+size 66929193

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/models/m_RRDB_esrgan_x4.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:018ab32fd56641b382fa572180f0679cebbaef868885d2a1e626ee5a4453f542
+size 67935783

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/models/m_RRDB_esrgan_x4_w8a16.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e254667574187295e925096f5c6386ef638cf5e79207329f83edbe44f485bdcd
+size 25100616

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/python/LR/baboon.png ADDED Viewed

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc ADDED Viewed

Binary file (3.2 kB). View file

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc ADDED Viewed

Binary file (3.22 kB). View file

model_farm_esrgan_qcs6490_qnn2.16_int16_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os.path as osp
+import glob
+import cv2
+import numpy as np
+# import torch
+import time
+import aidlite
+import os
+class esrganQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_RRDB_esrgan_x4_w8a16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1, 128, 128,3]]
+        # input_shapes = [[1,3, 128, 128]]
+        output_shapes = [[1, 3,128*4,128*4]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        self.interpreter.invoke()
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 128*4,128*4,3).copy()
+        return features_0
+esrgan_model= esrganQnn()
+test_img_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'LR/*')
+idx = 0
+for path in glob.glob(test_img_folder):
+    idx += 1
+    base = osp.splitext(osp.basename(path))[0]
+    print(idx, base)
+    # read images
+    img = cv2.imread(path, cv2.IMREAD_COLOR)
+    img = cv2.resize(img, (128,128))
+    img = img * 1.0 / 255
+    img = img[:, :, [2, 1, 0]]
+    img_LR = np.expand_dims(img,axis=0).astype(np.float32)
+    t0 = time.time()
+    output = esrgan_model(img_LR)  #.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = np.clip(output[0], 0, 1)
+    output = np.transpose(output, (2, 0, 1))
+    use_time = round((time.time() - t0) * 1000, 2)
+    print(f"Inference_time:{use_time} ms")
+    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
+    output = (output * 255.0).round()
+    cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'results/{:s}_rlt_16qnn.png'.format(base)), output)
+print("ok")
+esrgan_model.interpreter.destory()

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+## Model Information
+### Source model
+- Input shape: 128x128
+- Number of parameters: 16.69M
+- Model size: 63.8MB
+- Output shape: 1x3x512x512
+Source model repository: [ESRGAN](https://github.com/xinntao/ESRGAN/)
+### Converted Model
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+```bash
+cd python
+python3 demo_qnn.py
+```

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+    jsoncpp
+)

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/cpp/baboon.png ADDED Viewed

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,243 @@

+#include <iostream>
+#include <fstream>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <jsoncpp/json/json.h>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/m_RRDB_esrgan_x4_w8a8.qnn216.ctx.bin";
+    std::string imgs = "../baboon.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+void save_output_image_from_nhwc(float* output) {
+    unsigned int H = 512;
+    unsigned int W = 512;
+    unsigned int C = 3;
+    // Step 1: clip [0,1]
+    std::vector<float> clipped(H * W * C);
+    for (int i = 0; i < H * W * C; ++i) {
+        clipped[i] = std::min(1.0f, std::max(0.0f, output[i]));
+    }
+    // Step 2: NHWC (H,W,C) to CHW (C,H,W)
+    unsigned int src_dims1[4] = {H, W, C, 1};
+    unsigned int tsp_dims1[4] = {2, 0, 1, 3};
+    std::vector<float> chw(C * H * W);
+    transpose(clipped.data(), src_dims1, tsp_dims1, chw.data());
+    // Step 3: RGB to BGR
+    std::vector<float> chw_bgr(C * H * W);
+    for (int h = 0; h < H; ++h) {
+        for (int w = 0; w < W; ++w) {
+            for (int c = 0; c < C; ++c) {
+                int src_index = c * H * W + h * W + w;
+                int dst_c = 2 - c;
+                int dst_index = dst_c * H * W + h * W + w;
+                chw_bgr[dst_index] = chw[src_index];
+            }
+        }
+    }
+    // Step 4: CHW to HWC
+    unsigned int src_dims2[4] = {C, H, W, 1};
+    unsigned int tsp_dims2[4] = {1, 2, 0, 3};
+    std::vector<float> hwc(H * W * C);
+    transpose(chw_bgr.data(), src_dims2, tsp_dims2, hwc.data());
+    // Step 5: Convert to CV_8UC3 image
+    cv::Mat result(H, W, CV_8UC3);
+    for (int y = 0; y < H; ++y) {
+        for (int x = 0; x < W; ++x) {
+            int idx = (y * W + x) * C;
+            uchar b = static_cast<uchar>(std::round(hwc[idx + 0] * 255.0f));
+            uchar g = static_cast<uchar>(std::round(hwc[idx + 1] * 255.0f));
+            uchar r = static_cast<uchar>(std::round(hwc[idx + 2] * 255.0f));
+            result.at<cv::Vec3b>(y, x) = cv::Vec3b(b, g, r);
+        }
+    }
+    // Save the image
+    cv::imwrite("./result_img.jpg", result);
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 128;
+    unsigned int model_w = 128;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,3,model_h*4,model_w*4}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::cvtColor(frame_clone, frame_clone, cv::COLOR_BGR2RGB);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32FC3, 1.0 / 255.0);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    //  post process
+    save_output_image_from_nhwc(outdata0);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/models/m_RRDB_esrgan_x4_w8a8.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6e06342bfa355de053db65b7563b4b6417b5cba6c45aa0a4f09b4e5d795a4e3
+size 22131016

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/python/LR/baboon.png ADDED Viewed

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc ADDED Viewed

Binary file (3.2 kB). View file

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc ADDED Viewed

Binary file (3.22 kB). View file

model_farm_esrgan_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os.path as osp
+import glob
+import cv2
+import numpy as np
+import torch
+import time
+import aidlite
+import os
+class esrganQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_RRDB_esrgan_x4_w8a8.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1, 128, 128,3]]
+        # input_shapes = [[1,3, 128, 128]]
+        output_shapes = [[1, 3,128*4,128*4]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 128*4,128*4,3).copy()
+        return features_0
+def cosine_similarity(v1, v2):
+    v1 = v1.flatten()
+    v2 = v2.flatten()
+    # 计算点积
+    dot_product = np.dot(v1, v2)
+    # 计算每个向量的模长
+    norm_v1 = np.linalg.norm(v1)
+    norm_v2 = np.linalg.norm(v2)
+    # 防止除以零错误
+    norm_product = np.maximum(norm_v1 * norm_v2, 1e-8)
+    # 计算余弦相似度
+    return dot_product / norm_product
+esrgan_model= esrganQnn()
+test_img_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'LR/*')
+idx = 0
+for path in glob.glob(test_img_folder):
+    idx += 1
+    base = osp.splitext(osp.basename(path))[0]
+    print(idx, base)
+    # read images
+    img = cv2.imread(path, cv2.IMREAD_COLOR)
+    img = cv2.resize(img, (128,128))
+    img = img * 1.0 / 255
+    img = img[:, :, [2, 1, 0]]
+    img_LR = np.expand_dims(img,axis=0).astype(np.float32)
+    print("img_LR shape:",img_LR.shape)
+    t0 = time.time()
+    output = esrgan_model(img_LR)  #.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = np.clip(output[0], 0, 1)
+    output = np.transpose(output, (2, 0, 1))
+    use_time = round((time.time() - t0) * 1000, 2)
+    print(f"Inference_time:{use_time} ms")
+    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
+    output = (output * 255.0).round()
+    cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'{:s}_rlt_16qnn.png'.format(base)), output)
+print("ok")
+esrgan_model.interpreter.destory()

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+## Model Information
+### Source model
+- Input shape: 128x128
+- Number of parameters: 16.69M
+- Model size: 63.8MB
+- Output shape: 1x3x512x512
+Source model repository: [ESRGAN](https://github.com/xinntao/ESRGAN/)
+### Converted Model
+- Precision: FP16
+- Backend: QNN2.16
+- Target Device: SNM972 QCS8550
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### c++
+```bash
+cd esrgan/model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+    jsoncpp
+)

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/cpp/baboon.png ADDED Viewed

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,243 @@

+#include <iostream>
+#include <fstream>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <jsoncpp/json/json.h>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/m_RRDB_esrgan_x4_fp16.qnn216.ctx.bin";
+    std::string imgs = "../baboon.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+void save_output_image_from_nhwc(float* output) {
+    unsigned int H = 512;
+    unsigned int W = 512;
+    unsigned int C = 3;
+    // Step 1: clip [0,1]
+    std::vector<float> clipped(H * W * C);
+    for (int i = 0; i < H * W * C; ++i) {
+        clipped[i] = std::min(1.0f, std::max(0.0f, output[i]));
+    }
+    // Step 2: NHWC (H,W,C) to CHW (C,H,W)
+    unsigned int src_dims1[4] = {H, W, C, 1};
+    unsigned int tsp_dims1[4] = {2, 0, 1, 3};
+    std::vector<float> chw(C * H * W);
+    transpose(clipped.data(), src_dims1, tsp_dims1, chw.data());
+    // Step 3: RGB to BGR
+    std::vector<float> chw_bgr(C * H * W);
+    for (int h = 0; h < H; ++h) {
+        for (int w = 0; w < W; ++w) {
+            for (int c = 0; c < C; ++c) {
+                int src_index = c * H * W + h * W + w;
+                int dst_c = 2 - c;
+                int dst_index = dst_c * H * W + h * W + w;
+                chw_bgr[dst_index] = chw[src_index];
+            }
+        }
+    }
+    // Step 4: CHW to HWC
+    unsigned int src_dims2[4] = {C, H, W, 1};
+    unsigned int tsp_dims2[4] = {1, 2, 0, 3};
+    std::vector<float> hwc(H * W * C);
+    transpose(chw_bgr.data(), src_dims2, tsp_dims2, hwc.data());
+    // Step 5: Convert to CV_8UC3 image
+    cv::Mat result(H, W, CV_8UC3);
+    for (int y = 0; y < H; ++y) {
+        for (int x = 0; x < W; ++x) {
+            int idx = (y * W + x) * C;
+            uchar b = static_cast<uchar>(std::round(hwc[idx + 0] * 255.0f));
+            uchar g = static_cast<uchar>(std::round(hwc[idx + 1] * 255.0f));
+            uchar r = static_cast<uchar>(std::round(hwc[idx + 2] * 255.0f));
+            result.at<cv::Vec3b>(y, x) = cv::Vec3b(b, g, r);
+        }
+    }
+    // Save the image
+    cv::imwrite("./result_img.jpg", result);
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 128;
+    unsigned int model_w = 128;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,3,model_h*4,model_w*4}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::cvtColor(frame_clone, frame_clone, cv::COLOR_BGR2RGB);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32FC3, 1.0 / 255.0);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    //  post process
+    save_output_image_from_nhwc(outdata0);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/models/m_RRDB_esrgan_x4_fp16.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc96c372fa7389448b8a06b9ce4d39d52f764d4017fbe44751dbf4f8b67a2205
+size 38752576

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/python/LR/baboon.png ADDED Viewed

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc ADDED Viewed

Binary file (3.2 kB). View file

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc ADDED Viewed

Binary file (3.22 kB). View file

model_farm_esrgan_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os.path as osp
+import glob
+import cv2
+import numpy as np
+import torch
+import time
+import aidlite
+import os
+class esrganQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_RRDB_esrgan_x4_fp16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1, 128, 128,3]]
+        # input_shapes = [[1,3, 128, 128]]
+        output_shapes = [[1, 3,128*4,128*4]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 128*4,128*4,3).copy()
+        return features_0
+def cosine_similarity(v1, v2):
+    v1 = v1.flatten()
+    v2 = v2.flatten()
+    # 计算点积
+    dot_product = np.dot(v1, v2)
+    # 计算每个向量的模长
+    norm_v1 = np.linalg.norm(v1)
+    norm_v2 = np.linalg.norm(v2)
+    # 防止除以零错误
+    norm_product = np.maximum(norm_v1 * norm_v2, 1e-8)
+    # 计算余弦相似度
+    return dot_product / norm_product
+esrgan_model= esrganQnn()
+test_img_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'LR/*')
+idx = 0
+for path in glob.glob(test_img_folder):
+    idx += 1
+    base = osp.splitext(osp.basename(path))[0]
+    print(idx, base)
+    # read images
+    img = cv2.imread(path, cv2.IMREAD_COLOR)
+    img = cv2.resize(img, (128,128))
+    img = img * 1.0 / 255
+    img = img[:, :, [2, 1, 0]]
+    img_LR = np.expand_dims(img,axis=0).astype(np.float32)
+    print("img_LR shape:",img_LR.shape)
+    t0 = time.time()
+    output = esrgan_model(img_LR)  #.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = np.clip(output[0], 0, 1)
+    output = np.transpose(output, (2, 0, 1))
+    use_time = round((time.time() - t0) * 1000, 2)
+    print(f"Inference_time:{use_time} ms")
+    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
+    output = (output * 255.0).round()
+    cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'{:s}_rlt_16qnn.png'.format(base)), output)
+print("ok")
+esrgan_model.interpreter.destory()

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+## Model Information
+### Source model
+- Input shape: 128x128
+- Number of parameters: 16.69M
+- Model size: 63.8MB
+- Output shape: 1x3x512x512
+Source model repository: [ESRGAN](https://github.com/xinntao/ESRGAN/)
+### Converted Model
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: SNM972 QCS8550
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### c++
+```bash
+cd esrgan/model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+    jsoncpp
+)

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/cpp/baboon.png ADDED Viewed

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,243 @@

+#include <iostream>
+#include <fstream>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <jsoncpp/json/json.h>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/m_RRDB_esrgan_x4_w8a8.qnn216.ctx.bin";
+    std::string imgs = "../baboon.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+void save_output_image_from_nhwc(float* output) {
+    unsigned int H = 512;
+    unsigned int W = 512;
+    unsigned int C = 3;
+    // Step 1: clip [0,1]
+    std::vector<float> clipped(H * W * C);
+    for (int i = 0; i < H * W * C; ++i) {
+        clipped[i] = std::min(1.0f, std::max(0.0f, output[i]));
+    }
+    // Step 2: NHWC (H,W,C) to CHW (C,H,W)
+    unsigned int src_dims1[4] = {H, W, C, 1};
+    unsigned int tsp_dims1[4] = {2, 0, 1, 3};
+    std::vector<float> chw(C * H * W);
+    transpose(clipped.data(), src_dims1, tsp_dims1, chw.data());
+    // Step 3: RGB to BGR
+    std::vector<float> chw_bgr(C * H * W);
+    for (int h = 0; h < H; ++h) {
+        for (int w = 0; w < W; ++w) {
+            for (int c = 0; c < C; ++c) {
+                int src_index = c * H * W + h * W + w;
+                int dst_c = 2 - c;
+                int dst_index = dst_c * H * W + h * W + w;
+                chw_bgr[dst_index] = chw[src_index];
+            }
+        }
+    }
+    // Step 4: CHW to HWC
+    unsigned int src_dims2[4] = {C, H, W, 1};
+    unsigned int tsp_dims2[4] = {1, 2, 0, 3};
+    std::vector<float> hwc(H * W * C);
+    transpose(chw_bgr.data(), src_dims2, tsp_dims2, hwc.data());
+    // Step 5: Convert to CV_8UC3 image
+    cv::Mat result(H, W, CV_8UC3);
+    for (int y = 0; y < H; ++y) {
+        for (int x = 0; x < W; ++x) {
+            int idx = (y * W + x) * C;
+            uchar b = static_cast<uchar>(std::round(hwc[idx + 0] * 255.0f));
+            uchar g = static_cast<uchar>(std::round(hwc[idx + 1] * 255.0f));
+            uchar r = static_cast<uchar>(std::round(hwc[idx + 2] * 255.0f));
+            result.at<cv::Vec3b>(y, x) = cv::Vec3b(b, g, r);
+        }
+    }
+    // Save the image
+    cv::imwrite("./result_img.jpg", result);
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 128;
+    unsigned int model_w = 128;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,3,model_h*4,model_w*4}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::cvtColor(frame_clone, frame_clone, cv::COLOR_BGR2RGB);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32FC3, 1.0 / 255.0);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    //  post process
+    save_output_image_from_nhwc(outdata0);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/models/m_RRDB_esrgan_x4_w8a8.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8e3ecf6ce818649cbb568d1cc79452d222a43b049847e0e7fc9496033a6fa81
+size 22618440

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/python/LR/baboon.png ADDED Viewed

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc ADDED Viewed

Binary file (3.2 kB). View file

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc ADDED Viewed

Binary file (3.22 kB). View file

model_farm_esrgan_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os.path as osp
+import glob
+import cv2
+import numpy as np
+import torch
+import time
+import aidlite
+import os
+class esrganQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_RRDB_esrgan_x4_w8a8.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1, 128, 128,3]]
+        # input_shapes = [[1,3, 128, 128]]
+        output_shapes = [[1, 3,128*4,128*4]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 128*4,128*4,3).copy()
+        return features_0
+def cosine_similarity(v1, v2):
+    v1 = v1.flatten()
+    v2 = v2.flatten()
+    # 计算点积
+    dot_product = np.dot(v1, v2)
+    # 计算每个向量的模长
+    norm_v1 = np.linalg.norm(v1)
+    norm_v2 = np.linalg.norm(v2)
+    # 防止除以零错误
+    norm_product = np.maximum(norm_v1 * norm_v2, 1e-8)
+    # 计算余弦相似度
+    return dot_product / norm_product
+esrgan_model= esrganQnn()
+test_img_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'LR/*')
+idx = 0
+for path in glob.glob(test_img_folder):
+    idx += 1
+    base = osp.splitext(osp.basename(path))[0]
+    print(idx, base)
+    # read images
+    img = cv2.imread(path, cv2.IMREAD_COLOR)
+    img = cv2.resize(img, (128,128))
+    img = img * 1.0 / 255
+    img = img[:, :, [2, 1, 0]]
+    img_LR = np.expand_dims(img,axis=0).astype(np.float32)
+    print("img_LR shape:",img_LR.shape)
+    t0 = time.time()
+    output = esrgan_model(img_LR)  #.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = np.clip(output[0], 0, 1)
+    output = np.transpose(output, (2, 0, 1))
+    use_time = round((time.time() - t0) * 1000, 2)
+    print(f"Inference_time:{use_time} ms")
+    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
+    output = (output * 255.0).round()
+    cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'{:s}_rlt_16qnn.png'.format(base)), output)
+print("ok")
+esrgan_model.interpreter.destory()

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+## Model Information
+### Source model
+- Input shape: 128x128
+- Number of parameters: 16.69M
+- Model size: 63.8MB
+- Output shape: 1x3x512x512
+Source model repository: [ESRGAN](https://github.com/xinntao/ESRGAN/)
+### Converted Model
+- Precision: W8A16
+- Backend: QNN2.16
+- Target Device: SNM972 QCS8550
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+### Run demo
+#### python
+```bash
+cd python
+python3 demo_qnn.py
+```
+#### c++
+```bash
+cd esrgan/model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/cpp
+mkdir build && cd build
+cmake ..
+make
+./run_test
+```

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+find_package(OpenCV REQUIRED)
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+link_directories(
+    /usr/local/lib/
+)
+file(GLOB SRC_LISTS
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
+)
+add_executable(run_test ${SRC_LISTS})
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+    jsoncpp
+)

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/cpp/baboon.png ADDED Viewed

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp ADDED Viewed

	@@ -0,0 +1,243 @@

+#include <iostream>
+#include <fstream>
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <jsoncpp/json/json.h>
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+struct Args {
+    std::string target_model = "../../models/m_RRDB_esrgan_x4_w8a16.qnn216.ctx.bin";
+    std::string imgs = "../baboon.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] +
+                                    current_coordinate[2]*src_dims[3] +
+                                    current_coordinate[3];
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] +
+                                    current_coordinate[tsp_dims[3]];
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+void save_output_image_from_nhwc(float* output) {
+    unsigned int H = 512;
+    unsigned int W = 512;
+    unsigned int C = 3;
+    // Step 1: clip [0,1]
+    std::vector<float> clipped(H * W * C);
+    for (int i = 0; i < H * W * C; ++i) {
+        clipped[i] = std::min(1.0f, std::max(0.0f, output[i]));
+    }
+    // Step 2: NHWC (H,W,C) to CHW (C,H,W)
+    unsigned int src_dims1[4] = {H, W, C, 1};
+    unsigned int tsp_dims1[4] = {2, 0, 1, 3};
+    std::vector<float> chw(C * H * W);
+    transpose(clipped.data(), src_dims1, tsp_dims1, chw.data());
+    // Step 3: RGB to BGR
+    std::vector<float> chw_bgr(C * H * W);
+    for (int h = 0; h < H; ++h) {
+        for (int w = 0; w < W; ++w) {
+            for (int c = 0; c < C; ++c) {
+                int src_index = c * H * W + h * W + w;
+                int dst_c = 2 - c;
+                int dst_index = dst_c * H * W + h * W + w;
+                chw_bgr[dst_index] = chw[src_index];
+            }
+        }
+    }
+    // Step 4: CHW to HWC
+    unsigned int src_dims2[4] = {C, H, W, 1};
+    unsigned int tsp_dims2[4] = {1, 2, 0, 3};
+    std::vector<float> hwc(H * W * C);
+    transpose(chw_bgr.data(), src_dims2, tsp_dims2, hwc.data());
+    // Step 5: Convert to CV_8UC3 image
+    cv::Mat result(H, W, CV_8UC3);
+    for (int y = 0; y < H; ++y) {
+        for (int x = 0; x < W; ++x) {
+            int idx = (y * W + x) * C;
+            uchar b = static_cast<uchar>(std::round(hwc[idx + 0] * 255.0f));
+            uchar g = static_cast<uchar>(std::round(hwc[idx + 1] * 255.0f));
+            uchar r = static_cast<uchar>(std::round(hwc[idx + 2] * 255.0f));
+            result.at<cv::Vec3b>(y, x) = cv::Vec3b(b, g, r);
+        }
+    }
+    // Save the image
+    cv::imwrite("./result_img.jpg", result);
+}
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+    unsigned int model_h = 128;
+    unsigned int model_w = 128;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,model_h,model_w,3}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,3,model_h*4,model_w*4}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::cvtColor(frame_clone, frame_clone, cv::COLOR_BGR2RGB);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32FC3, 1.0 / 255.0);
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+    }
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n",
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+    //  post process
+    save_output_image_from_nhwc(outdata0);
+    fast_interpreter->destory();
+    return 0;
+}
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/models/m_RRDB_esrgan_x4_w8a16.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ef9af1175914c55d210c01fdaa8d3acaf2d6f117b370ccc3f7d7b2351c16e3f
+size 24006984

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/python/LR/baboon.png ADDED Viewed

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/python/__pycache__/RRDBNet_arch.cpython-38.pyc ADDED Viewed

Binary file (3.2 kB). View file

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/python/__pycache__/RRDBNet_arch.cpython-39.pyc ADDED Viewed

Binary file (3.22 kB). View file

model_farm_esrgan_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os.path as osp
+import glob
+import cv2
+import numpy as np
+import torch
+import time
+import aidlite
+import os
+class esrganQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_RRDB_esrgan_x4_w8a16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        self.config.is_quantify_model = 1
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1, 128, 128,3]]
+        # input_shapes = [[1,3, 128, 128]]
+        output_shapes = [[1, 3,128*4,128*4]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+        print(" model load success!")
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 128*4,128*4,3).copy()
+        return features_0
+def cosine_similarity(v1, v2):
+    v1 = v1.flatten()
+    v2 = v2.flatten()
+    # 计算点积
+    dot_product = np.dot(v1, v2)
+    # 计算每个向量的模长
+    norm_v1 = np.linalg.norm(v1)
+    norm_v2 = np.linalg.norm(v2)
+    # 防止除以零错误
+    norm_product = np.maximum(norm_v1 * norm_v2, 1e-8)
+    # 计算余弦相似度
+    return dot_product / norm_product
+esrgan_model= esrganQnn()
+test_img_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),'LR/*')
+idx = 0
+for path in glob.glob(test_img_folder):
+    idx += 1
+    base = osp.splitext(osp.basename(path))[0]
+    print(idx, base)
+    # read images
+    img = cv2.imread(path, cv2.IMREAD_COLOR)
+    img = cv2.resize(img, (128,128))
+    img = img * 1.0 / 255
+    img = img[:, :, [2, 1, 0]]
+    img_LR = np.expand_dims(img,axis=0).astype(np.float32)
+    print("img_LR shape:",img_LR.shape)
+    t0 = time.time()
+    output = esrgan_model(img_LR)  #.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = np.clip(output[0], 0, 1)
+    output = np.transpose(output, (2, 0, 1))
+    use_time = round((time.time() - t0) * 1000, 2)
+    print(f"Inference_time:{use_time} ms")
+    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
+    output = (output * 255.0).round()
+    cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'{:s}_rlt_16qnn.png'.format(base)), output)
+print("ok")
+esrgan_model.interpreter.destory()