diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..65537aa24e3a4532458b7f13fde833a3c950096b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png filter=lfs diff=lfs merge=lfs -text
+model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png filter=lfs diff=lfs merge=lfs -text
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..deefaf184e222d56a045f53fc0fa74709f72f4fd
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/README.md
@@ -0,0 +1,55 @@
+## Model Information
+### Source model
+- Input shape: 1x3x512x1024   
+- Number of parameters:18.04M
+- Model size:69.4MB, 
+- Output shape: 1x19x64x128
+
+Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+
+### Converted model
+
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+
+## Inference with AidLite SDK
+
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+
+- install AidLite SDK
+
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+
+- Verify AidLite SDK
+
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+
+### Run demo
+#### python
+```bash
+cd python 
+python3 demo_qnn.py
+```
+
+#### c++
+```bash
+cd ffnet54s/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp
+mkdir build && cd build 
+cmake ..
+make
+./run_test
+```
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt
@@ -0,0 +1,31 @@
+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+
+find_package(OpenCV REQUIRED)
+
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+
+link_directories(
+    /usr/local/lib/
+)
+
+file(GLOB SRC_LISTS 
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp  
+)
+
+add_executable(run_test ${SRC_LISTS})
+
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..54c6d91879d41f2f63d566935d580525e53ed9b2
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp
@@ -0,0 +1,365 @@
+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+
+struct Args {
+    std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[2]*src_dims[3] + 
+                                    current_coordinate[3];
+
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[3]];
+
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+
+    return EXIT_SUCCESS;
+}
+
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+
+
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+
+    }
+
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", 
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+
+
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8decb11af15dd8a6dda346fbb82733b3de6392fc
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ce910deb26ab4b1c9fb1c77e37b12b913473b18ac59c9ca0b45d65f212292d2
+size 18336944
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..b42dab13ce7ea1acbf84d8a68c95a69de7978597
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py
@@ -0,0 +1,133 @@
+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+
+
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+
+
+
+class ffnet54sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+   
+        print(" model load success!")
+
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+
+
+
+
+
+ffnet_segm = ffnet54sQnn()
+
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data 
+max_probs, predictions = output_data.max(1)
+
+
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()
+
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fa4aa2c5c8ef141951e2bd0ff49e3117e22e9fac
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/README.md
@@ -0,0 +1,55 @@
+## Model Information
+### Source model
+- Input shape: 1x3x512x1024   
+- Number of parameters:18.04M
+- Model size:69.4MB, 
+- Output shape: 1x19x64x128
+
+Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+
+### Converted model
+
+- Precision: W8A16
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+
+## Inference with AidLite SDK
+
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+
+- install AidLite SDK
+
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+
+- Verify AidLite SDK
+
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+
+### Run demo
+#### python
+```bash
+cd python 
+python3 demo_qnn.py
+```
+
+#### c++
+```bash
+cd ffnet54s/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp
+mkdir build && cd build 
+cmake ..
+make
+./run_test
+```
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt
@@ -0,0 +1,31 @@
+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+
+find_package(OpenCV REQUIRED)
+
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+
+link_directories(
+    /usr/local/lib/
+)
+
+file(GLOB SRC_LISTS 
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp  
+)
+
+add_executable(run_test ${SRC_LISTS})
+
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..98864a05827949b978ded878584179bcf01aff06
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/cpp/run_test.cpp
@@ -0,0 +1,365 @@
+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+
+struct Args {
+    std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[2]*src_dims[3] + 
+                                    current_coordinate[3];
+
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[3]];
+
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+
+    return EXIT_SUCCESS;
+}
+
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+
+
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+
+    }
+
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", 
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+
+
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec96acf48de9aaef2c70540355cea62d6590da50
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e3065b3055672fb4f52f561a8ffb6ccb03e501480335f2f5f97d8cfaa6f0a4c
+size 72810122
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth
new file mode 100644
index 0000000000000000000000000000000000000000..608206844a0e46b19be5285c981dbd3ad1fd78c8
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:597f83804cb9866c784b3d99209ee9e3b8b1f0b4f838c022a934ae5726f58218
+size 72423358
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5d57eb060f4116abfc81cd9dc31da5e7859c04f7
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ab3604cdedd3dc8ff34698bd15a197690df0511eae6e4856da89187fe7d17f1
+size 18537648
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f424b19daac188ea0fc2d01c69779b63e83ebe28
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/0000.jpg differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..302d9271c70a34b09f7e19638bdba331702306ec
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/config.py
@@ -0,0 +1,11 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+imagenet_base_path = None
+cityscapes_base_path = None
+model_weights_base_path = None
+
+CITYSCAPES_MEAN = [0.485, 0.456, 0.406]
+CITYSCAPES_STD = [0.229, 0.224, 0.225]
+CITYSCAPES_NUM_CLASSES = 19
+CITYSCAPES_IGNORE_LABEL = 255
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5d71ad2e78f139c1f1923b2fa683ffcb336833e
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/demo_qnn.py
@@ -0,0 +1,133 @@
+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+
+
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+
+
+
+class ffnet54sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+   
+        print(" model load success!")
+
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+
+
+
+
+
+ffnet_segm = ffnet54sQnn()
+
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data 
+max_probs, predictions = output_data.max(1)
+
+
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()
+
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py
new file mode 100644
index 0000000000000000000000000000000000000000..996a019876ecf7f555cf69b554d79ba8412a8a4c
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/export_jit.py
@@ -0,0 +1,44 @@
+import numpy as np
+import torch
+import os
+import sys
+from typing import Callable, Tuple
+from models import resnet
+from models.ffnet_blocks import create_ffnet
+torch.set_grad_enabled(False)
+
+
+
+def segmentation_ffnet54S_dBBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet54S_dBBB_mobile",
+        backbone=resnet.Resnet54S_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth"),
+        strict_loading=True,
+    )
+
+
+
+
+ffnet54_dbbb = segmentation_ffnet54S_dBBB_mobile()
+
+
+
+
+num_params = sum(p.numel() for p in ffnet54_dbbb.parameters() if p.requires_grad)
+print(f'Number of palm_detector parameters: {num_params}')
+
+
+ffnet_seg = ffnet54_dbbb
+
+seg_d_in = torch.randn(1, 3, 512, 1024,dtype= torch.float32)
+
+
+source_model = torch.jit.trace(ffnet_seg,seg_d_in)
+source_model.save("ffnet54S_dBBB_cityscapes_state_dict_quarts.pt")
+print("export pose detect ok!")
+
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c19c7fe6d14325c574f2516229be6e2ed44c122
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+from .ffnet_S_mobile import *
+from .ffnet_NS_mobile import *
+from .ffnet_gpu_large import *
+from .ffnet_S_gpu_large import *
+from .ffnet_N_gpu_large import *
+from .ffnet_gpu_small import *
+from .ffnet_S_gpu_small import *
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a3a2c11dd4f0ce92176c55dc137e87afa5866e4d
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/__init__.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6fef455bbf751c8a1c3cd11a387cea3a3f64d9ab
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_NS_mobile.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f800e94f0d1f8afdf5edf4936e73c49dd3255d3
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_N_gpu_large.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..335bcecbfb1794e8bfccf46f3afe9e4498a792d5
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_large.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2b5080f2b50bbf837c6163ccb844a0289f6b1e84
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_gpu_small.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..41ea5f5eda0f99489b0d41d86df8ac7b49ea1704
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_S_mobile.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72deade5c33775c901e7f79ee00de77a405d6df1
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_blocks.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f3670e44f49a3d4bf088020f2fdc8c65a499ea7b
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_large.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..74fdebcbb433f11b9cab28586f770ac759d34195
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/ffnet_gpu_small.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..40260528fbcc60f828b8a4254e21285668cc0ead
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/model_registry.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..51d52047f511bc812cdb1aa1cacc8240e19d48a8
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/resnet.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5cb683094e22fabeca5cf2ccb7a50a5572a8ac8c
Binary files /dev/null and b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/__pycache__/utils.cpython-39.pyc differ
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py
new file mode 100644
index 0000000000000000000000000000000000000000..16d39c8d1043838c3e60b9c5ff67db2e1c72d88c
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_NS_mobile.py
@@ -0,0 +1,318 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import os
+from functools import partial
+
+import torch
+
+
+from models.utils import model_weight_initializer
+from models import resnet
+
+import os
+import sys
+import numpy as np
+
+import torch.nn as nn
+import torch._utils
+import torch.nn.functional as F
+
+from models.ffnet_blocks import create_ffnet
+from models.model_registry import register_model
+from config import model_weights_base_path
+
+
+##########################################################################################
+##### 3-Stage Mobile FFNets trained for 1024x512 images, outputing segmentation maps of
+##### 256x128 pixels. These models are intended for use with the
+##### cityscapes evaluation script, which uses image sizes of 2048x1024
+##########################################################################################
+@register_model
+def segmentation_ffnet122NS_CBB_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet122NS_CBB_mobile_pre_down",
+        backbone=resnet.Resnet122NS,
+        pre_downsampling=True,  # Downsample the incoming image, before passing it to the network
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122NS/ffnet122NS_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet74NS_CBB_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet74NS_CBB_mobile_pre_down",
+        backbone=resnet.Resnet74NS,
+        pre_downsampling=True,  # Downsample the incoming image, before passing it to the network
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet74NS/ffnet74NS_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet46NS_CBB_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet46NS_CBB_mobile_pre_down",
+        backbone=resnet.Resnet46NS,
+        pre_downsampling=True,  # Downsample the incoming image, before passing it to the network
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet46NS/ffnet46NS_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet122NS_CCC_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet122NS_CCC_mobile_pre_down",
+        backbone=resnet.Resnet122NS,
+        pre_downsampling=True,  # Downsample the incoming image, before passing it to the network
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122NS/ffnet122NS_CCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet74NS_CCC_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet74NS_CCC_mobile_pre_down",
+        backbone=resnet.Resnet74NS,
+        pre_downsampling=True,  # Downsample the incoming image, before passing it to the network
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet74NS/ffnet74NS_CCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet46NS_CCC_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet46NS_CCC_mobile_pre_down",
+        backbone=resnet.Resnet46NS,
+        pre_downsampling=True,  # Downsample the incoming image, before passing it to the network
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet46NS/ffnet46NS_CCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### The **actual** 3-Stage Mobile FFNets to export / use with 1024x512 images directly,
+##### and output a segmentation map of 256x128 pixels
+##########################################################################################
+#
+@register_model
+def segmentation_ffnet122NS_CBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet122NS_CBB_mobile",
+        backbone=resnet.Resnet122NS,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122NS/ffnet122NS_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet74NS_CBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet74NS_CBB_mobile",
+        backbone=resnet.Resnet74NS,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet74NS/ffnet74NS_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet46NS_CBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet46NS_CBB_mobile",
+        backbone=resnet.Resnet46NS,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet46NS/ffnet46NS_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet122NS_CCC_mobile():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet122NS_CCC_mobile",
+        backbone=resnet.Resnet122NS,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122NS/ffnet122NS_CCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet74NS_CCC_mobile():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet74NS_CCC_mobile",
+        backbone=resnet.Resnet74NS,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet74NS/ffnet74NS_CCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet46NS_CCC_mobile():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet46NS_CCC_mobile",
+        backbone=resnet.Resnet46NS,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet46NS/ffnet46NS_CCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+##########################################################################################
+##### Classification models with an FFNet structure. Primarily intended for imagenet
+##### initialization of FFNet.
+##### See the README for the hyperparameters for training the classification models
+##########################################################################################
+@register_model
+def classification_ffnet122NS_CBX_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet122NS_CBX_mobile",
+        backbone=resnet.Resnet122NS,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122NS/ffnet122NS_CBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=False,
+        strict_loading=True,
+        dropout_rate=0.2,
+    )
+
+
+@register_model
+def classification_ffnet74NS_CBX_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet74NS_CBX_mobile",
+        backbone=resnet.Resnet74NS,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet74NS/ffnet74NS_CBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=False,
+        strict_loading=True,
+        dropout_rate=0.2,
+    )
+
+
+@register_model
+def classification_ffnet46NS_CBX_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet46NS_CBX_mobile",
+        backbone=resnet.Resnet46NS,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet46NS/ffnet46NS_CBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=False,
+        strict_loading=True,
+        dropout_rate=0.2,
+    )
+
+
+##########################################################################################
+##### This is an example of how these FFNet models, which are intended for 1024x512 images
+##### would be initialized for training on cityscapes with 2048x1024 images
+##########################################################################################
+@register_model
+def segmentation_ffnet122NS_CBB_mobile_pre_down_train():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet122NS_CBB_mobile_pre_down",
+        backbone=resnet.Resnet122NS,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122NS/ffnet122NS_CBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=True,  # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
+        strict_loading=False,  # Strict loading is false here because the weights are going into a model with pre_downsampling=True
+    )
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py
new file mode 100644
index 0000000000000000000000000000000000000000..8883e4f3fe76c62332ef460df19bb1648ef37886
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_N_gpu_large.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import os
+from functools import partial
+
+import torch
+
+
+from models import resnet
+
+import os
+import sys
+import numpy as np
+
+import torch.nn as nn
+import torch._utils
+import torch.nn.functional as F
+
+from models.ffnet_blocks import create_ffnet
+from models.model_registry import register_model
+from config import model_weights_base_path
+
+
+##########################################################################################
+##### 3-Stage GPU FFNets. These are trained for use with image sizes of 2048x1024 and
+##### output segmentation maps of size 512x256 pixels
+##########################################################################################
+@register_model
+def segmentation_ffnet122N_CBB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet122N_CBB",
+        backbone=resnet.Resnet122N,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122N/ffnet122N_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet74N_CBB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet74N_CBB",
+        backbone=resnet.Resnet74N,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet74N/ffnet74N_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet46N_CBB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet46N_CBB",
+        backbone=resnet.Resnet46N,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet46N/ffnet46N_CBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### Classification models with an FFNet structure. Primarily intended for imagenet
+##### initialization of FFNet.
+##### See the README for the hyperparameters for training the classification models
+##########################################################################################
+@register_model
+def classification_ffnet122N_CBX():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet122N_CBX",
+        backbone=resnet.Resnet122N,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122N/ffnet122N_CBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=False,
+        strict_loading=True,
+        dropout_rate=0.2,
+    )
+
+
+@register_model
+def classification_ffnet74N_CBX():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet74N_CBX",
+        backbone=resnet.Resnet74N,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet74N/ffnet74N_CBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=False,
+        strict_loading=True,
+        dropout_rate=0.2,
+    )
+
+
+@register_model
+def classification_ffnet46N_CBX():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet46N_CBX",
+        backbone=resnet.Resnet46N,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet46N/ffnet46N_CBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=False,
+        strict_loading=True,
+        dropout_rate=0.2,
+    )
+
+
+##########################################################################################
+##### This is an example of how these FFNet models would be initialized for training on
+##### cityscapes with 2048x1024 images
+##########################################################################################
+@register_model
+def segmentation_ffnet122N_CBB_train():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet122N_CBB",
+        backbone=resnet.Resnet122N,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet122N/ffnet122N_CBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=True,  # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
+        strict_loading=False,
+    )
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py
new file mode 100644
index 0000000000000000000000000000000000000000..940b9b705e7f0a69f24e0249fc1e21bd8c6164ba
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_large.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import os
+from functools import partial
+
+import torch
+
+
+from models import resnet
+
+import os
+import sys
+import numpy as np
+
+import torch.nn as nn
+import torch._utils
+import torch.nn.functional as F
+
+from models.ffnet_blocks import create_ffnet
+from models.model_registry import register_model
+from config import model_weights_base_path
+
+
+##########################################################################################
+##### 4-Stage GPU FFNets with Slim backbone.
+##### These are trained for use with image sizes of 2048x1024
+##### and output a segmentation map of 512x256 pixels
+##########################################################################################
+@register_model
+def segmentation_ffnet150S_BBB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet150S_BBB",
+        backbone=resnet.Resnet150S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150S/ffnet150S_BBB_gpu_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet86S_BBB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet86S_BBB",
+        backbone=resnet.Resnet86S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BBB_gpu_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### This is an example of how these FFNet models would be initialized for training on
+##### cityscapes with 2048x1024 images
+##########################################################################################
+@register_model
+def segmentation_ffnet86S_BBB_train():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet86S_BBB",
+        backbone=resnet.Resnet86S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=True,  # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
+        strict_loading=False,
+    )
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py
new file mode 100644
index 0000000000000000000000000000000000000000..12158368fae72b545d7b992d5e120c2019320a85
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_gpu_small.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import os
+from functools import partial
+
+import torch
+
+
+from models import resnet
+
+import os
+import sys
+import numpy as np
+
+import torch.nn as nn
+import torch._utils
+import torch.nn.functional as F
+
+from models.ffnet_blocks import create_ffnet
+from models.model_registry import register_model
+from config import model_weights_base_path
+
+
+##########################################################################################
+##### 4-Stage GPU FFNets with Slim backbone.
+##### These are trained for use with image sizes of 2048x1024
+##### and output a segmentation map of 256x128 pixels
+##########################################################################################
+@register_model
+def segmentation_ffnet150S_dBBB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet150S_dBBB",
+        backbone=resnet.Resnet150S_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150S/ffnet150S_dBBB_gpu_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet86S_dBBB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet86S_dBBB",
+        backbone=resnet.Resnet86S_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_dBBB_gpu_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### Classification models with an FFNet structure. Primarily intended for imagenet
+##### initialization of FFNet.
+##### See the README for the hyperparameters for training the classification models
+##########################################################################################
+@register_model
+def classification_ffnet150S_BBX():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet150S_BBX",
+        backbone=resnet.Resnet150S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150S/ffnet150S_BBX_gpu_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet86S_BBX():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet86S_BBX",
+        backbone=resnet.Resnet86S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### This is an example of how these FFNet models would be initialized for training on
+##### cityscapes with 2048x1024 images
+##########################################################################################
+@register_model
+def segmentation_ffnet86S_dBBB_train():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet86S_dBBB",
+        backbone=resnet.Resnet86S_D,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BBX_gpu_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=True,  # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
+        strict_loading=False,
+    )
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f240c543ce2c98f549bb2b28cc8c07cf1673230
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_S_mobile.py
@@ -0,0 +1,555 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import os
+from functools import partial
+
+import torch
+
+
+from models import resnet
+
+import os
+import sys
+import numpy as np
+
+import torch.nn as nn
+import torch._utils
+import torch.nn.functional as F
+
+from models.ffnet_blocks import create_ffnet
+from models.model_registry import register_model
+from config import model_weights_base_path
+
+
+##########################################################################################
+##### 4-Stage Mobile FFNets with Slim backbone.
+##### These are trained for use with image sizes of 2048x1024, and output a segmentation map
+##### of 256x128 pixels
+##########################################################################################
+@register_model
+def segmentation_ffnet86S_dBBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet86S_dBBB_mobile",
+        backbone=resnet.Resnet86S_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_dBBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet78S_dBBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet78S_dBBB_mobile",
+        backbone=resnet.Resnet78S_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet78S/ffnet78S_dBBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet54S_dBBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet54S_dBBB_mobile",
+        backbone=resnet.Resnet54S_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet54S/ffnet54S_dBBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet40S_dBBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet40S_dBBB_mobile",
+        backbone=resnet.Resnet40S_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet40S/ffnet40S_dBBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### 4-Stage Mobile FFNets with Slim backbone, trained for use with image sizes of 1024x512
+##### and output a segmentation map of 256x128 pixels
+##### These versions are meant for use with the cityscapes evaluation script, which provides
+##### inputs at 2048x1024
+##########################################################################################
+@register_model
+def segmentation_ffnet150S_BBB_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet150S_BBB_mobile_pre_down",
+        backbone=resnet.Resnet150S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150S/ffnet150S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet86S_BBB_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet86S_BBB_mobile_pre_down",
+        backbone=resnet.Resnet86S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet78S_BBB_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet78S_BBB_mobile_pre_down",
+        backbone=resnet.Resnet78S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet78S/ffnet78S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet54S_BBB_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet54S_BBB_mobile_pre_down",
+        backbone=resnet.Resnet54S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet54S/ffnet54S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet40S_BBB_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet40S_BBB_mobile_pre_down",
+        backbone=resnet.Resnet40S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet40S/ffnet40S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet150S_BCC_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet150S_BCC_mobile_pre_down",
+        backbone=resnet.Resnet150S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150S/ffnet150S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet86S_BCC_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet86S_BCC_mobile_pre_down",
+        backbone=resnet.Resnet86S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet78S_BCC_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet78S_BCC_mobile_pre_down",
+        backbone=resnet.Resnet78S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet78S/ffnet78S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet54S_BCC_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet54S_BCC_mobile_pre_down",
+        backbone=resnet.Resnet54S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet54S/ffnet54S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet40S_BCC_mobile_pre_down():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet40S_BCC_mobile_pre_down",
+        backbone=resnet.Resnet40S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet40S/ffnet40S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### 4-Stage Mobile FFNets with Slim backbone.
+##### These are the actual models, trained for use with image sizes of 1024x512
+##### and output a segmentation map of 256x128 pixels
+##### See the versions with _pre_down suffix for models to use with the cityscapes evaluation script
+##########################################################################################
+@register_model
+def segmentation_ffnet150S_BBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet150S_BBB_mobile",
+        backbone=resnet.Resnet150S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150S/ffnet150S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet86S_BBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet86S_BBB_mobile",
+        backbone=resnet.Resnet86S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet78S_BBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet78S_BBB_mobile",
+        backbone=resnet.Resnet78S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet78S/ffnet78S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet54S_BBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet54S_BBB_mobile",
+        backbone=resnet.Resnet54S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet54S/ffnet54S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet40S_BBB_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet40S_BBB_mobile",
+        backbone=resnet.Resnet40S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet40S/ffnet40S_BBB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet150S_BCC_mobile():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet150S_BCC_mobile",
+        backbone=resnet.Resnet150S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150S/ffnet150S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet86S_BCC_mobile():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet86S_BCC_mobile",
+        backbone=resnet.Resnet86S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet78S_BCC_mobile():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet78S_BCC_mobile",
+        backbone=resnet.Resnet78S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet78S/ffnet78S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet54S_BCC_mobile():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet54S_BCC_mobile",
+        backbone=resnet.Resnet54S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet54S/ffnet54S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+@register_model
+def segmentation_ffnet40S_BCC_mobile():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet40S_BCC_mobile",
+        backbone=resnet.Resnet40S,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet40S/ffnet40S_BCC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=False,  # Strict loading is false here because the weights come from a model with pre_downsampling=True
+    )
+
+
+##########################################################################################
+##### Classification models with an FFNet structure. Primarily intended for imagenet
+##### initialization of FFNet.
+##### See the README for the hyperparameters for training the classification models
+##########################################################################################
+@register_model
+def classification_ffnet150S_BBX_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet150S_BBX_mobile",
+        backbone=resnet.Resnet150S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150S/ffnet150S_BBX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet86S_BBX_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet86S_BBX_mobile",
+        backbone=resnet.Resnet86S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86S/ffnet86S_BBX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet78S_BBX_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet78S_BBX_mobile",
+        backbone=resnet.Resnet78S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet78S/ffnet78S_BBX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet54S_BBX_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet54S_BBX_mobile",
+        backbone=resnet.Resnet54S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet54S/ffnet54S_BBX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet40S_BBX_mobile():
+    return create_ffnet(
+        ffnet_head_type="B_mobile",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet40S_BBX_mobile",
+        backbone=resnet.Resnet40S,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet40S/ffnet40S_BBX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### This is an example of how the FFNet models intended for 1024x512 images
+##### would be initialized for training on cityscapes with 2048x1024 images
+##### Set up the rest accordingly
+##########################################################################################
+@register_model
+def segmentation_ffnet78S_BCC_mobile_pre_down_train():
+    return create_ffnet(
+        ffnet_head_type="C_mobile",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet78S_BCC_mobile_pre_down",
+        backbone=resnet.Resnet78S,
+        pre_downsampling=True,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet78S/ffnet78S_BBX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=True,  # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
+        strict_loading=False,  # Strict loading is false here because the weights are going into a model with pre_downsampling=True
+    )
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py
new file mode 100644
index 0000000000000000000000000000000000000000..69074f5d4c540bae853926215dfe1ad0c7ae472d
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_blocks.py
@@ -0,0 +1,663 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+#########################################################################################
+# Part of the code in UpBranch adapted from https://github.com/feinanshan/FANet/blob/master/Testing/models/fanet/fanet.py
+#
+# The original source code was made available under the following license
+#  MIT License
+#
+#  Copyright (c) 2021 Ping Hu
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a copy
+#  of this software and associated documentation files (the "Software"), to deal
+#  in the Software without restriction, including without limitation the rights
+#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#  copies of the Software, and to permit persons to whom the Software is
+#  furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included in all
+#  copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#  SOFTWARE.
+
+
+# Code for ClassificationHead adapted from https://github.com/HRNet/HRNet-Image-Classification
+
+# The original source code was made available under the following license
+#  MIT License
+#  Copyright (c) 2019 Microsoft Corporation
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a copy
+#  of this software and associated documentation files (the "Software"), to deal
+#  in the Software without restriction, including without limitation the rights
+#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#  copies of the Software, and to permit persons to whom the Software is
+#  furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included in all
+#  copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#  SOFTWARE.
+#########################################################################################
+
+
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+from models.utils import model_weight_initializer
+import torchvision.transforms as T
+from scipy import ndimage
+
+# The modules here currently assume that there are always 4 branches.
+# It would need to be adapted in order to support a variable number of branches
+
+# TODO : Pass BN momentum through config
+BN_MOMENTUM = 0.1
+gpu_up_kwargs = {"mode": "bilinear", "align_corners": True}
+mobile_up_kwargs = {"mode": "nearest"}
+relu_inplace = True
+
+# TODO : Replace functional interpolate operations with upsample modules
+
+
+class ConvBNReLU(nn.Module):
+    def __init__(
+        self,
+        in_chan,
+        out_chan,
+        ks=3,
+        stride=1,
+        padding=1,
+        activation=nn.ReLU,
+        *args,
+        **kwargs,
+    ):
+        super(ConvBNReLU, self).__init__()
+        layers = [
+            nn.Conv2d(
+                in_chan,
+                out_chan,
+                kernel_size=ks,
+                stride=stride,
+                padding=padding,
+                bias=False,
+            ),
+            nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM),
+        ]
+        if activation:
+            layers.append(activation(inplace=relu_inplace))
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.layers(x)
+
+
+class AdapterConv(nn.Module):
+    def __init__(
+        self, in_channels=[256, 512, 1024, 2048], out_channels=[64, 128, 256, 512]
+    ):
+        super(AdapterConv, self).__init__()
+        assert len(in_channels) == len(
+            out_channels
+        ), "Number of input and output branches should match"
+        self.adapter_conv = nn.ModuleList()
+
+        for k in range(len(in_channels)):
+            self.adapter_conv.append(
+                ConvBNReLU(in_channels[k], out_channels[k], ks=1, stride=1, padding=0),
+            )
+
+    def forward(self, x):
+        out = []
+        for k in range(len(self.adapter_conv)):
+            out.append(self.adapter_conv[k](x[k]))
+        return out
+
+
+class UpsampleCat(nn.Module):
+    def __init__(self, upsample_kwargs=gpu_up_kwargs):
+        super(UpsampleCat, self).__init__()
+        self._up_kwargs = upsample_kwargs
+
+    def forward(self, x):
+        """Upsample and concatenate feature maps."""
+        assert isinstance(x, list) or isinstance(x, tuple)
+        # print(self._up_kwargs)
+        x0 = x[0]
+        _, _, H, W = x0.size()
+        for i in range(1, len(x)):
+            x0 = torch.cat([x0, F.interpolate(x[i], (H, W), **self._up_kwargs)], dim=1)
+        return x0
+
+
+class UpBranch(nn.Module):
+    def __init__(
+        self,
+        in_channels=[64, 128, 256, 512],
+        out_channels=[128, 128, 128, 128],
+        upsample_kwargs=gpu_up_kwargs,
+    ):
+        super(UpBranch, self).__init__()
+
+        self._up_kwargs = upsample_kwargs
+
+        self.fam_32_sm = ConvBNReLU(
+            in_channels[3], out_channels[3], ks=3, stride=1, padding=1
+        )
+        self.fam_32_up = ConvBNReLU(
+            in_channels[3], in_channels[2], ks=1, stride=1, padding=0
+        )
+        self.fam_16_sm = ConvBNReLU(
+            in_channels[2], out_channels[2], ks=3, stride=1, padding=1
+        )
+        self.fam_16_up = ConvBNReLU(
+            in_channels[2], in_channels[1], ks=1, stride=1, padding=0
+        )
+        self.fam_8_sm = ConvBNReLU(
+            in_channels[1], out_channels[1], ks=3, stride=1, padding=1
+        )
+        self.fam_8_up = ConvBNReLU(
+            in_channels[1], in_channels[0], ks=1, stride=1, padding=0
+        )
+        self.fam_4 = ConvBNReLU(
+            in_channels[0], out_channels[0], ks=3, stride=1, padding=1
+        )
+
+        self.high_level_ch = sum(out_channels)
+        self.out_channels = out_channels
+
+    def forward(self, x):
+
+        feat4, feat8, feat16, feat32 = x
+
+        smfeat_32 = self.fam_32_sm(feat32)
+        upfeat_32 = self.fam_32_up(feat32)
+
+        _, _, H, W = feat16.size()
+        x = F.interpolate(upfeat_32, (H, W), **self._up_kwargs) + feat16
+        smfeat_16 = self.fam_16_sm(x)
+        upfeat_16 = self.fam_16_up(x)
+
+        _, _, H, W = feat8.size()
+        x = F.interpolate(upfeat_16, (H, W), **self._up_kwargs) + feat8
+        smfeat_8 = self.fam_8_sm(x)
+        upfeat_8 = self.fam_8_up(x)
+
+        _, _, H, W = feat4.size()
+        smfeat_4 = self.fam_4(
+            F.interpolate(upfeat_8, (H, W), **self._up_kwargs) + feat4
+        )
+
+        return smfeat_4, smfeat_8, smfeat_16, smfeat_32
+
+
+class FFNetUpHead(nn.Module):
+    def __init__(
+        self,
+        in_chans,
+        use_adapter_conv=True,
+        head_type="B_mobile",
+        task="segmentation_A",
+        num_classes=19,
+        base_chans=[64, 128, 256, 512],
+        dropout_rate=None,  # Only used for classification
+        *args,
+        **kwargs,
+    ):
+        super(FFNetUpHead, self).__init__()
+        layers = []
+        # base_chans = [64, 128, 128, 128]
+        if head_type.startswith("A"):
+            base_chans = [64, 128, 256, 512]
+        elif head_type.startswith("B"):
+            base_chans = [64, 128, 128, 256]
+        elif head_type.startswith("C"):
+            base_chans = [128, 128, 128, 128]
+
+        if use_adapter_conv:
+            layers.append(AdapterConv(in_chans, base_chans))
+            in_chans = base_chans[:]
+
+        if head_type == "A":
+            layers.append(UpBranch(in_chans))
+        elif head_type == "A_mobile":
+            layers.append(UpBranch(in_chans, upsample_kwargs=mobile_up_kwargs))
+        elif head_type == "B":
+            layers.append(UpBranch(in_chans, [96, 96, 64, 32]))
+        elif head_type == "B_mobile":
+            layers.append(
+                UpBranch(in_chans, [96, 96, 64, 32], upsample_kwargs=mobile_up_kwargs)
+            )
+        elif head_type == "C":
+            layers.append(UpBranch(in_chans, [128, 16, 16, 16]))
+        elif head_type == "C_mobile":
+            layers.append(
+                UpBranch(in_chans, [128, 16, 16, 16], upsample_kwargs=mobile_up_kwargs)
+            )
+        else:
+            raise ValueError(f"Unknown FFNetUpHead type {head_type}")
+
+        self.num_features = layers[-1].high_level_ch
+        self.num_multi_scale_features = layers[-1].out_channels
+
+        if task.startswith("segmentation"):
+            if "mobile" in head_type:
+                layers.append(UpsampleCat(mobile_up_kwargs))
+            else:
+                layers.append(UpsampleCat(gpu_up_kwargs))
+
+            # Gets single scale input
+            if "_C" in task:
+                mid_feat = 128
+                layers.append(
+                    SegmentationHead_NoSigmoid_1x1(
+                        self.num_features,
+                        mid_feat,
+                        num_outputs=num_classes,
+                    )
+                )
+            elif "_B" in task:
+                mid_feat = 256
+                layers.append(
+                    SegmentationHead_NoSigmoid_3x3(
+                        self.num_features,
+                        mid_feat,
+                        num_outputs=num_classes,
+                    )
+                )
+            elif "_A" in task:
+                mid_feat = 512
+                layers.append(
+                    SegmentationHead_NoSigmoid_1x1(
+                        self.num_features,
+                        mid_feat,
+                        num_outputs=num_classes,
+                    )
+                )
+            else:
+                raise ValueError(f"Unknown Segmentation Head {task}")
+
+        elif task == "classification":
+            # Gets multi scale input
+            layers.append(
+                ClassificationHead(
+                    self.num_multi_scale_features,
+                    [128, 256, 512, 1024],
+                    num_outputs=num_classes,
+                    dropout_rate=dropout_rate,
+                )
+            )
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.layers(x)
+
+
+class SimpleBottleneckBlock(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1):
+        super(SimpleBottleneckBlock, self).__init__()
+        bn_mom = 0.1
+        bn_eps = 1e-5
+
+        self.downsample = None
+        if stride != 1 or inplanes != planes * self.expansion:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    planes * self.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(planes * self.expansion, momentum=bn_mom),
+            )
+
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=bn_mom)
+        self.conv2 = nn.Conv2d(
+            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(planes, momentum=bn_mom)
+        self.conv3 = nn.Conv2d(
+            planes, planes * self.expansion, kernel_size=1, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=bn_mom)
+        self.relu = nn.ReLU(inplace=True)
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ClassificationHead(nn.Module):
+    def __init__(
+        self,
+        pre_head_channels,
+        head_channels=[128, 256, 512, 1024],
+        num_outputs=1,
+        dropout_rate=None,
+    ):
+        super(ClassificationHead, self).__init__()
+
+        self.dropout_rate = dropout_rate
+        bn_mom = 0.1
+        bn_eps = 1e-5
+        head_block_type = SimpleBottleneckBlock
+        head_expansion = 4
+
+        expansion_layers = []
+        for i, pre_head_channel in enumerate(pre_head_channels):
+            expansion_layer = head_block_type(
+                pre_head_channel,
+                int(head_channels[i] / head_expansion),
+            )
+            expansion_layers.append(expansion_layer)
+        self.expansion_layers = nn.ModuleList(expansion_layers)
+
+        # downsampling modules
+        downsampling_layers = []
+        for i in range(len(pre_head_channels) - 1):
+            input_channels = head_channels[i]
+            output_channels = head_channels[i + 1]
+
+            downsampling_layer = nn.Sequential(
+                nn.Conv2d(
+                    in_channels=input_channels,
+                    out_channels=output_channels,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                ),
+                nn.BatchNorm2d(output_channels, momentum=bn_mom),
+                nn.ReLU(),
+            )
+
+            downsampling_layers.append(downsampling_layer)
+        self.downsampling_layers = nn.ModuleList(downsampling_layers)
+
+        self.final_layer = nn.Sequential(
+            nn.Conv2d(
+                in_channels=head_channels[-1],
+                out_channels=2048,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            ),
+            nn.BatchNorm2d(2048, momentum=bn_mom),
+            nn.ReLU(inplace=True),
+        )
+
+        self.adaptive_avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.classifier = nn.Linear(
+            2048,
+            num_outputs,
+        )
+
+    def forward(self, x):
+
+        next_x = self.expansion_layers[0](x[0])
+        for i in range(len(self.downsampling_layers)):
+            next_x = self.expansion_layers[i + 1](x[i + 1]) + self.downsampling_layers[
+                i
+            ](next_x)
+        x = next_x
+
+        x = self.final_layer(x)
+        x = self.adaptive_avg_pool(x).squeeze()
+
+        if self.dropout_rate:
+            x = torch.nn.functional.dropout(
+                x, p=self._model_config.dropout_rate, training=self.training
+            )
+
+        x = self.classifier(x)
+        return x
+
+
+class SegmentationHead_NoSigmoid_3x3(nn.Module):
+    def __init__(
+        self, backbone_channels, mid_channels=256, kernel_size=3, num_outputs=1
+    ):
+        super(SegmentationHead_NoSigmoid_3x3, self).__init__()
+        last_inp_channels = backbone_channels
+        self.last_layer = nn.Sequential(
+            nn.Conv2d(
+                in_channels=last_inp_channels,
+                out_channels=mid_channels,
+                kernel_size=kernel_size,
+                stride=1,
+                padding=kernel_size // 2,
+            ),
+            nn.BatchNorm2d(mid_channels, momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=relu_inplace),
+            nn.Conv2d(
+                in_channels=mid_channels,
+                out_channels=num_outputs,
+                kernel_size=kernel_size,
+                stride=1,
+                padding=kernel_size // 2,
+            ),
+        )
+
+    def forward(self, x):
+        x = self.last_layer(x)
+        return x
+
+
+class SegmentationHead_NoSigmoid_1x1(nn.Module):
+    def __init__(
+        self, backbone_channels, mid_channels=512, kernel_size=3, num_outputs=1
+    ):
+        super(SegmentationHead_NoSigmoid_1x1, self).__init__()
+        last_inp_channels = backbone_channels
+        self.last_layer = nn.Sequential(
+            nn.Conv2d(
+                in_channels=last_inp_channels,
+                out_channels=mid_channels,
+                kernel_size=kernel_size,
+                stride=1,
+                padding=kernel_size // 2,
+            ),
+            nn.BatchNorm2d(mid_channels, momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=relu_inplace),
+            nn.Conv2d(
+                in_channels=mid_channels,
+                out_channels=num_outputs,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            ),
+        )
+
+    def forward(self, x):
+        x = self.last_layer(x)
+        return x
+
+
+class GaussianConv2D(nn.Module):
+    """
+    Gaussian smoothing + downsampling, applied independently per channel
+    THIS IS NOT MEANT FOR USE ON MOBILE. MIGHT BE HORRIBLY SLOW
+    """
+
+    def __init__(self, channels, kernel_size, sigma, stride=1):
+        super(GaussianConv2D, self).__init__()
+        assert isinstance(
+            kernel_size, int
+        ), "Specify kernel size as int. Both dimensions will get the same kernel size"
+        assert isinstance(sigma, float), "Specify sigma as float. Anisotropic gaussian"
+
+        kernel = torch.zeros(kernel_size, kernel_size)
+        mean_loc = int((kernel_size - 1) / 2)  # Because 0 indexed
+        kernel[mean_loc, mean_loc] = 1
+        kernel = torch.from_numpy(ndimage.gaussian_filter(kernel.numpy(), sigma=sigma))
+
+        # Make a dwise conv out of the kernel
+        # Weights of shape out_channels, in_channels/groups, k, k
+        kernel = kernel.view(1, 1, kernel_size, kernel_size)
+        kernel = kernel.repeat(channels, 1, 1, 1)
+
+        self.conv = F.conv2d
+        # Register the kernel buffer instead of as a parameter, so that the training doesn't
+        # happily update it
+        self.register_buffer("weight", kernel)
+        self.channels = channels
+        self.stride = stride
+
+    def forward(self, input):
+        return self.conv(
+            input, weight=self.weight, groups=self.channels, stride=self.stride
+        )
+
+
+class FFNet(nn.Module):
+    def __init__(
+        self,
+        ffnet_head_type="A",
+        num_classes=19,
+        task="segmentation_A",
+        use_adapter_convs=True,
+        backbone=None,
+        pre_downsampling=False,
+        model_name="default",
+        dropout_rate=None,
+        **kwargs,
+    ):
+        super(FFNet, self).__init__()
+        self.backbone_model = backbone()
+        branch_chans = self.backbone_model.out_channels
+        self.use_adapter_convs = use_adapter_convs
+        self.ffnet_head_type = ffnet_head_type
+        self.task = task
+        self.head = FFNetUpHead(
+            branch_chans,
+            use_adapter_conv=use_adapter_convs,
+            head_type=ffnet_head_type,
+            num_classes=num_classes,
+            task=task,
+            dropout_rate=dropout_rate,
+        )
+        self.model_name = model_name
+        # Pre-downsampling is used while training models that use 1024x512 image sizes rather than 2048x1024.
+        self.pre_downsampling = pre_downsampling
+        if self.pre_downsampling:
+            self.smoothing = GaussianConv2D(
+                channels=3, kernel_size=5, sigma=0.7, stride=2
+            )
+
+    def forward(self, x):
+        if self.pre_downsampling:
+            x = self.smooth_and_downsample_input(x)
+        x = self.backbone_model(x)
+        return self.head(x)
+
+    def smooth_and_downsample_input(self, x):
+        x = F.pad(x, (0, 0, 1, 1), mode="reflect")
+        return self.smoothing(x)
+
+    def init_model(
+        self, pretrained_path=None, strict_loading=True, backbone_only=False
+    ):
+        print(f"Initializing {self.model_name} weights")
+        self.apply(model_weight_initializer)
+        if pretrained_path:
+            pretrained_dict = torch.load(
+                pretrained_path, map_location={"cuda:0": "cpu"}
+            )
+            if backbone_only:
+                backbone_dict = {}
+                for k, v in pretrained_dict.items():
+                    if k.startswith("backbone_model"):
+                        backbone_dict[k] = v
+                self.load_state_dict(backbone_dict, strict=strict_loading)
+            else:
+                self.load_state_dict(pretrained_dict, strict=strict_loading)
+        else:
+            self.backbone_model.load_weights()
+
+
+def create_ffnet(
+    pretrained=True,
+    imagenet_backbone_pretrained=True,
+    pretrained_weights_path=None,
+    pretrained_backbone_only=False,
+    ffnet_head_type="A",
+    strict_loading=True,
+    num_classes=19,
+    task="segmentation_A",
+    model_name="ffnnet122NS_CCC",
+    backbone=None,
+    pre_downsampling=False,
+    dropout_rate=None,
+    **kwargs,
+):
+
+    if pretrained_weights_path:
+        model_wghts = pretrained_weights_path
+        pretrained = True
+    if imagenet_backbone_pretrained:
+        pretrained = True
+
+    model = FFNet(
+        ffnet_head_type=ffnet_head_type,
+        num_classes=num_classes,
+        task=task,
+        use_adapter_convs=True,
+        backbone=backbone,
+        pre_downsampling=pre_downsampling,
+        model_name=model_name,
+        dropout_rate=dropout_rate,
+    )
+
+    model.apply(model_weight_initializer)
+    if pretrained:
+        if pretrained_weights_path:
+            print("Loading pretrained model state dict from {}".format(model_wghts))
+            model.init_model(
+                model_wghts,
+                strict_loading=strict_loading,
+                backbone_only=pretrained_backbone_only,
+            )
+        else:
+            print(
+                "No model weights provided, attempting to load imagenet pretrained backbone..."
+            )
+            model.init_model()
+
+    model.eval()
+    return model
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbb492d8b48b91b150acf39d122d82cfaf3df9d6
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_large.py
@@ -0,0 +1,235 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import os
+from functools import partial
+
+import torch
+
+
+from models import resnet
+
+import os
+import sys
+import numpy as np
+
+import torch.nn as nn
+import torch._utils
+import torch.nn.functional as F
+
+from models.ffnet_blocks import create_ffnet
+from models.model_registry import register_model
+from config import model_weights_base_path
+
+
+##########################################################################################
+##### 4-Stage GPU FFNets with ResNet backbone.
+##### These are trained for use with image sizes of 2048x1024
+##### and output a segmentation map of 512x256 pixels
+##########################################################################################
+@register_model
+def segmentation_ffnet150_AAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet150_AAA",
+        backbone=resnet.Resnet150,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150/ffnet150_AAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet134_AAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet134_AAA",
+        backbone=resnet.Resnet134,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet134/ffnet134_AAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet101_AAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet101_AAA",
+        backbone=resnet.Resnet101,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet101/ffnet101_AAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet86_AAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet86_AAA",
+        backbone=resnet.Resnet86,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86/ffnet86_AAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet56_AAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet56_AAA",
+        backbone=resnet.Resnet56,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet56/ffnet56_AAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet50_AAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet50_AAA",
+        backbone=resnet.Resnet50,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet50/ffnet50_AAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet34_AAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet34_AAA",
+        backbone=resnet.Resnet34,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet34/ffnet34_AAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet150_ABB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet150_ABB",
+        backbone=resnet.Resnet150,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150/ffnet150_ABB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet86_ABB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet86_ABB",
+        backbone=resnet.Resnet86,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86/ffnet86_ABB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet56_ABB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet56_ABB",
+        backbone=resnet.Resnet56,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet56/ffnet56_ABB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet34_ABB():
+    return create_ffnet(
+        ffnet_head_type="B",
+        task="segmentation_B",
+        num_classes=19,
+        model_name="ffnnet34_ABB",
+        backbone=resnet.Resnet34,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet34/ffnet34_ABB_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### This is an example of how these FFNet models would be initialized for training on
+##### cityscapes with 2048x1024 images
+##########################################################################################
+@register_model
+def segmentation_ffnet150_AAA_train():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet150_AAA",
+        backbone=resnet.Resnet150,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=True,  # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
+        strict_loading=False,
+    )
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py
new file mode 100644
index 0000000000000000000000000000000000000000..d26f776e4ece1d56369748b58e1dca5d6132ae6f
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/ffnet_gpu_small.py
@@ -0,0 +1,385 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import os
+from functools import partial
+
+import torch
+
+
+from models import resnet
+
+import os
+import sys
+import numpy as np
+
+import torch.nn as nn
+import torch._utils
+import torch.nn.functional as F
+
+from models.ffnet_blocks import create_ffnet
+from models.model_registry import register_model
+from config import model_weights_base_path
+
+
+##########################################################################################
+##### 4-Stage GPU FFNets with ResNet backbone.
+##### These are trained for use with image sizes of 2048x1024
+##### and output a segmentation map of 256x128 pixels
+##########################################################################################
+@register_model
+def segmentation_ffnet150_dAAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet150_dAAA",
+        backbone=resnet.Resnet150_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150/ffnet150_dAAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet134_dAAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet134_dAAA",
+        backbone=resnet.Resnet134_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet134/ffnet134_dAAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet101_dAAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet101_dAAA",
+        backbone=resnet.Resnet101_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet101/ffnet101_dAAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet86_dAAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet86_dAAA",
+        backbone=resnet.Resnet86_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86/ffnet86_dAAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet56_dAAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet56_dAAA",
+        backbone=resnet.Resnet56_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet56/ffnet56_dAAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet50_dAAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet50_dAAA",
+        backbone=resnet.Resnet50_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet50/ffnet50_dAAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet34_dAAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet34_dAAA",
+        backbone=resnet.Resnet34_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet34/ffnet34_dAAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet18_dAAA():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_A",
+        num_classes=19,
+        model_name="ffnnet18_dAAA",
+        backbone=resnet.Resnet18_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet18/ffnet18_dAAA_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet150_dAAC():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet150_dAAC",
+        backbone=resnet.Resnet150_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150/ffnet150_dAAC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet86_dAAC():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet86_dAAC",
+        backbone=resnet.Resnet86_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86/ffnet86_dAAC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet34_dAAC():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet34_dAAC",
+        backbone=resnet.Resnet34_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet34/ffnet34_dAAC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def segmentation_ffnet18_dAAC():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet18_dAAC",
+        backbone=resnet.Resnet18_D,
+        pre_downsampling=False,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet18/ffnet18_dAAC_cityscapes_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### Classification models with an FFNet structure. Primarily intended for imagenet
+##### initialization of FFNet.
+##### See the README for the hyperparameters for training the classification models
+##########################################################################################
+@register_model
+def classification_ffnet150_AAX():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet150_AAX",
+        backbone=resnet.Resnet150,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet134_AAX():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet134_AAX",
+        backbone=resnet.Resnet134,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet134/ffnet134_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet101_AAX():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet101_AAX",
+        backbone=resnet.Resnet101,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet101/ffnet101_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet86_AAX():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet86_AAX",
+        backbone=resnet.Resnet86,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet86/ffnet86_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet56_AAX():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet56_AAX",
+        backbone=resnet.Resnet56,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet56/ffnet56_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet50_AAX():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet50_AAX",
+        backbone=resnet.Resnet50,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet50/ffnet50_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet34_AAX():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet34_AAX",
+        backbone=resnet.Resnet34,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet34/ffnet34_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+@register_model
+def classification_ffnet18_AAX():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="classification",
+        num_classes=1000,
+        model_name="ffnnet18_AAX",
+        backbone=resnet.Resnet18,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet18/ffnet18_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        strict_loading=True,
+    )
+
+
+##########################################################################################
+##### This is an example of how these FFNet models would be initialized for training on
+##### cityscapes with 2048x1024 images
+##########################################################################################
+@register_model
+def segmentation_ffnet150_dAAC_train():
+    return create_ffnet(
+        ffnet_head_type="A",
+        task="segmentation_C",
+        num_classes=19,
+        model_name="ffnnet150_dAAC",
+        backbone=resnet.Resnet150_D,
+        pretrained_weights_path=os.path.join(
+            model_weights_base_path,
+            "ffnet150/ffnet150_AAX_imagenet_state_dict_quarts.pth",
+        ),
+        pretrained_backbone_only=True,  # Set when initializing with *FFNet* ImageNet weights to ensure that the head is initialized from scratch
+        strict_loading=False,
+    )
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..075db6e7706c995ef14c515e6fd2071259933213
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/model_registry.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import sys
+
+_model_entrypoints = {}
+
+
+def register_model(fn):
+    # lookup containing module
+    mod = sys.modules[fn.__module__]
+    # add model to __all__ in module
+    model_name = fn.__name__
+    if hasattr(mod, "__all__"):
+        mod.__all__.append(model_name)
+    else:
+        mod.__all__ = [model_name]
+
+    # add entries to registry dict/sets
+    _model_entrypoints[model_name] = fn
+    return fn
+
+
+def model_entrypoint(model_name):
+    """Fetch a model entrypoint for specified model name"""
+    if model_name in _model_entrypoints:
+        return _model_entrypoints[model_name]
+    else:
+        raise RuntimeError(
+            f"Unknown model ({model_name}); known models are: "
+            f"{_model_entrypoints.keys()}"
+        )
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e9cb8aee66ba476462092baf7d80e9564dc6ea3
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/resnet.py
@@ -0,0 +1,593 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+#########################################################################
+# Code adapted from https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
+
+# The original source code was made available under the following license
+#  BSD 3-Clause License
+#
+#  Copyright (c) Soumith Chintala 2016,
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+#  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+#  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+#  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+#  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#########################################################################
+#### **The main takeaway is that simple FFNets made out of resnet backbones made using basic-block
+#### **are just as competitive as complex architectures such as HRNet, DDRNet, FANet etc.
+
+#### New and old ResNet backbones, designed for use with FFNet. These do not have a classification
+#### head attached here. ImageNet training of these backbones is done as an FFNet with a classification
+#### head attached. See ffnet.py and ffnet_blocks.py.
+#### Also, these models do not make a distinction between GPU and mobile because the elements that we change
+#### between the two are among the additional modules that FFNet adds.
+#########################################################################
+import torch
+
+#### These are weights for the backbone when trained directly with a classification head attached at the end of the
+#### backbone, and not as part of the FFNet structure. For a minor training accuracy advantage, one could use these
+#### weights as the initialization for the relevant models in the new family of models,
+#### but training from scratch works nearly equally well
+model_paths = {
+    "resnet18": "/pretrained_weights/resnet18.pth",
+    "resnet34": "/pretrained_weights/resnet34.pth",
+    "resnet50": "/pretrained_weights/resnet50.pth",
+    "resnet101": "/pretrained_weights/resnet101.pth",
+}
+
+import torch.nn as nn
+import torch._utils
+
+
+BN_MOMENTUM = 0.1
+relu_inplace = True
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False
+    )
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_chan, out_chan, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(in_chan, out_chan, stride)
+        self.bn1 = nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM)
+        self.conv2 = conv3x3(out_chan, out_chan)
+        self.bn2 = nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=relu_inplace)
+        self.downsample = None
+        if in_chan != out_chan or stride != 1:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_chan, momentum=BN_MOMENTUM),
+            )
+
+    def forward(self, x):
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        shortcut = x
+        if self.downsample is not None:
+            shortcut = self.downsample(x)
+
+        out_ = shortcut + out
+        out_ = self.relu(out_)
+        return out_
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_chan, out_chan, stride=1, base_width=64):
+        super(Bottleneck, self).__init__()
+        width = int(out_chan * (base_width / 64.0)) * 1
+        self.conv1 = conv1x1(in_chan, width)
+        self.bn1 = nn.BatchNorm2d(width, momentum=BN_MOMENTUM)
+        self.conv2 = conv3x3(width, width, stride)
+        self.bn2 = nn.BatchNorm2d(width, momentum=BN_MOMENTUM)
+        self.conv3 = conv1x1(width, out_chan * self.expansion)
+        self.bn3 = nn.BatchNorm2d(out_chan * self.expansion, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=relu_inplace)
+        self.downsample = None
+        if in_chan != out_chan * self.expansion or stride != 1:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(
+                    in_chan,
+                    out_chan * self.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(out_chan * self.expansion, momentum=BN_MOMENTUM),
+            )
+
+    def forward(self, x):
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        shortcut = x
+        if self.downsample is not None:
+            shortcut = self.downsample(x)
+
+        out_ = shortcut + out
+        out_ = self.relu(out_)
+
+        return out_
+
+
+##########################################################################################
+##### Vanilla ResNets, but with a more filled out model space, and primarily using basic blocks
+##########################################################################################
+
+
+class ResNet(nn.Module):
+    def __init__(
+        self,
+        block,
+        layers,
+        strides,
+        pretrained_path=None,
+        branch_chans=[64, 128, 256, 512],
+    ):
+        super(ResNet, self).__init__()
+        self.pretrained_path = pretrained_path
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=relu_inplace)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.inplanes = 64
+        self.layer1 = self._make_layer(
+            block, branch_chans[0], bnum=layers[0], stride=strides[0]
+        )
+        self.layer2 = self._make_layer(
+            block, branch_chans[1], bnum=layers[1], stride=strides[1]
+        )
+        self.layer3 = self._make_layer(
+            block, branch_chans[2], bnum=layers[2], stride=strides[2]
+        )
+        self.layer4 = self._make_layer(
+            block, branch_chans[3], bnum=layers[3], stride=strides[3]
+        )
+        self.out_channels = [x * block.expansion for x in branch_chans]
+
+    def _make_layer(self, block, out_chan, bnum, stride=1):
+        layers = [block(self.inplanes, out_chan, stride=stride)]
+        self.inplanes = out_chan * block.expansion
+        for i in range(bnum - 1):
+            layers.append(block(self.inplanes, out_chan, stride=1))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.relu(self.bn1(x))
+        x = self.maxpool(x)
+
+        feat4 = self.layer1(x)
+        feat8 = self.layer2(feat4)  # 1/8
+        feat16 = self.layer3(feat8)  # 1/16
+        feat32 = self.layer4(feat16)  # 1/32
+        return feat4, feat8, feat16, feat32
+
+    def load_weights(self, pretrained_path=None):
+        if not pretrained_path:
+            pretrained_path = self.pretrained_path
+        if self.pretrained_path or pretrained_path:
+            pretrained_dict = torch.load(
+                pretrained_path, map_location={"cuda:0": "cpu"}
+            )
+            print(f"Loading backbone weights from {pretrained_path} with strict=False")
+            print(f"Caution!! Things could silently fail here")
+            self.load_state_dict(pretrained_dict, strict=False)
+        else:
+            print("No backbone weights loaded")
+
+
+##########################################################################################
+##### Vanilla ResNet instantiations
+##### The versions marked with _D are not trained on ImageNet, and use the weights from
+##### the respective models without a _D in the name
+##########################################################################################
+
+
+def Resnet18_D(**kwargs):
+    model = ResNet(BasicBlock, [2, 2, 2, 2], [2, 2, 2, 2])  # , model_paths["resnet18"])
+    return model
+
+
+def Resnet18(**kwargs):
+    model = ResNet(BasicBlock, [2, 2, 2, 2], [1, 2, 2, 2])  # , model_paths["resnet18"])
+    return model
+
+
+def Resnet34_D(**kwargs):
+    model = ResNet(BasicBlock, [3, 4, 6, 3], [2, 2, 2, 2])  # , model_paths["resnet34"])
+    return model
+
+
+def Resnet34(**kwargs):
+    model = ResNet(BasicBlock, [3, 4, 6, 3], [1, 2, 2, 2])  # , model_paths["resnet34"])
+    return model
+
+
+def Resnet50_D(**kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3], [2, 2, 2, 2])  # , model_paths["resnet50"])
+    return model
+
+
+def Resnet50(**kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3], [1, 2, 2, 2])  # , model_paths["resnet50"])
+    return model
+
+
+# can use model_paths["resnet34"] to initialize the weights here, for instance
+def Resnet56_D(**kwargs):
+    model = ResNet(BasicBlock, [4, 8, 12, 3], [2, 2, 2, 2])
+    return model
+
+
+def Resnet56(**kwargs):
+    model = ResNet(BasicBlock, [4, 8, 12, 3], [1, 2, 2, 2])
+    return model
+
+
+def Resnet86_D(**kwargs):
+    model = ResNet(BasicBlock, [8, 12, 16, 6], [2, 2, 2, 2])
+    return model
+
+
+def Resnet86(**kwargs):
+    model = ResNet(BasicBlock, [8, 12, 16, 6], [1, 2, 2, 2])
+    return model
+
+
+def Resnet101_D(**kwargs):
+    model = ResNet(
+        Bottleneck, [3, 4, 23, 3], [2, 2, 2, 2]
+    )  # , model_paths["resnet101"])
+    return model
+
+
+def Resnet101(**kwargs):
+    model = ResNet(
+        Bottleneck, [3, 4, 23, 3], [1, 2, 2, 2]
+    )  # , model_paths["resnet101"])
+    return model
+
+
+def Resnet134_D(**kwargs):
+    model = ResNet(BasicBlock, [8, 18, 28, 12], [2, 2, 2, 2])
+    return model
+
+
+def Resnet134(**kwargs):
+    model = ResNet(BasicBlock, [8, 18, 28, 12], [1, 2, 2, 2])
+    return model
+
+
+def Resnet150_D(**kwargs):
+    model = ResNet(BasicBlock, [16, 18, 28, 12], [2, 2, 2, 2])
+    return model
+
+
+def Resnet150(**kwargs):
+    model = ResNet(BasicBlock, [16, 18, 28, 12], [1, 2, 2, 2])
+    return model
+
+
+##########################################################################################
+##### Slim ResNets. Narrower, with a deeper stem
+##########################################################################################
+
+
+class ResNetS(nn.Module):
+    def __init__(
+        self,
+        block,
+        layers,
+        strides,
+        pretrained_path=None,
+        branch_chans=[64, 128, 192, 320],
+    ):
+        super(ResNetS, self).__init__()
+        self.pretrained_path = pretrained_path
+        self.conv0 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn0 = nn.BatchNorm2d(32, momentum=BN_MOMENTUM)
+        self.relu0 = nn.ReLU(inplace=relu_inplace)
+        self.conv1 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu1 = nn.ReLU(inplace=relu_inplace)
+        self.inplanes = 64
+        self.layer1 = self._make_layer(
+            block, branch_chans[0], bnum=layers[0], stride=strides[0]
+        )
+        self.layer2 = self._make_layer(
+            block, branch_chans[1], bnum=layers[1], stride=strides[1]
+        )
+        self.layer3 = self._make_layer(
+            block, branch_chans[2], bnum=layers[2], stride=strides[2]
+        )
+        self.layer4 = self._make_layer(
+            block, branch_chans[3], bnum=layers[3], stride=strides[3]
+        )
+        self.out_channels = [x * block.expansion for x in branch_chans]
+
+    def _make_layer(self, block, out_chan, bnum, stride=1):
+        layers = [block(self.inplanes, out_chan, stride=stride)]
+        self.inplanes = out_chan * block.expansion
+        for i in range(bnum - 1):
+            layers.append(block(self.inplanes, out_chan, stride=1))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv0(x)
+        x = self.relu0(self.bn0(x))
+        x = self.relu1(self.bn1(self.conv1(x)))
+
+        feat4 = self.layer1(x)
+        feat8 = self.layer2(feat4)  # 1/8
+        feat16 = self.layer3(feat8)  # 1/16
+        feat32 = self.layer4(feat16)  # 1/32
+        return feat4, feat8, feat16, feat32
+
+    def load_weights(self, pretrained_path=None):
+        if not pretrained_path:
+            pretrained_path = self.pretrained_path
+        if self.pretrained_path or pretrained_path:
+            pretrained_dict = torch.load(
+                pretrained_path, map_location={"cuda:0": "cpu"}
+            )
+            print(f"Loading backbone weights from {pretrained_path} with strict=False")
+            print(f"Caution!! Things could silently fail here")
+            self.load_state_dict(pretrained_dict, strict=False)
+        else:
+            print("No backbone weights loaded")
+
+
+##########################################################################################
+##### Slim ResNet Instantiations
+##### The versions marked with _D are not trained on ImageNet, and use the weights from
+##### the respective models without a _D in the name
+##########################################################################################
+
+
+def Resnet22S_D(**kwargs):
+    model = ResNetS(BasicBlock, [2, 3, 3, 2], [2, 2, 2, 2])
+    return model
+
+
+def Resnet22S(**kwargs):
+    model = ResNetS(BasicBlock, [2, 3, 3, 2], [1, 2, 2, 2])
+    return model
+
+
+def Resnet30S_D(**kwargs):
+    model = ResNetS(BasicBlock, [3, 4, 4, 3], [2, 2, 2, 2])
+    return model
+
+
+def Resnet30S(**kwargs):
+    model = ResNetS(BasicBlock, [3, 4, 4, 3], [1, 2, 2, 2])
+    return model
+
+
+def Resnet40S_D(**kwargs):
+    model = ResNetS(BasicBlock, [4, 5, 6, 4], [2, 2, 2, 2])
+    return model
+
+
+def Resnet40S(**kwargs):
+    model = ResNetS(BasicBlock, [4, 5, 6, 4], [1, 2, 2, 2])
+    return model
+
+
+def Resnet54S_D(**kwargs):
+    model = ResNetS(BasicBlock, [5, 8, 8, 5], [2, 2, 2, 2])
+    return model
+
+
+def Resnet54S(**kwargs):
+    model = ResNetS(BasicBlock, [5, 8, 8, 5], [1, 2, 2, 2])
+    return model
+
+
+def Resnet78S_D(**kwargs):
+    model = ResNetS(BasicBlock, [6, 12, 12, 8], [2, 2, 2, 2])
+    return model
+
+
+def Resnet78S(**kwargs):
+    model = ResNetS(BasicBlock, [6, 12, 12, 8], [1, 2, 2, 2])
+    return model
+
+
+def Resnet86S_D(**kwargs):
+    model = ResNetS(BasicBlock, [8, 12, 16, 6], [2, 2, 2, 2])
+    return model
+
+
+def Resnet86S(**kwargs):
+    model = ResNetS(BasicBlock, [8, 12, 16, 6], [1, 2, 2, 2])
+    return model
+
+
+def Resnet150S_D(**kwargs):
+    model = ResNetS(BasicBlock, [16, 18, 28, 12], [2, 2, 2, 2])
+    return model
+
+
+def Resnet150S(**kwargs):
+    model = ResNetS(BasicBlock, [16, 18, 28, 12], [1, 2, 2, 2])
+    return model
+
+
+##########################################################################################
+##### 3 Stage ResNets
+##########################################################################################
+
+
+class ResNetNarrow(nn.Module):
+    def __init__(
+        self,
+        block,
+        layers,
+        strides,
+        pretrained_path=None,
+        branch_chans=[64, 96, 160, 320],
+    ):
+        super(ResNetNarrow, self).__init__()
+        self.pretrained_path = pretrained_path
+        # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.conv0 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn0 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu0 = nn.ReLU(inplace=relu_inplace)
+        self.conv1 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu1 = nn.ReLU(inplace=relu_inplace)
+        self.conv2 = nn.Conv2d(
+            64, branch_chans[0], kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(branch_chans[0], momentum=BN_MOMENTUM)
+        self.relu2 = nn.ReLU(inplace=relu_inplace)
+        self.inplanes = branch_chans[0]
+        self.layer1 = self._make_layer(
+            block, branch_chans[1], bnum=layers[0], stride=strides[0]
+        )
+        self.layer2 = self._make_layer(
+            block, branch_chans[2], bnum=layers[1], stride=strides[1]
+        )
+        self.layer3 = self._make_layer(
+            block, branch_chans[3], bnum=layers[2], stride=strides[2]
+        )
+        # Always load weights, and re-init from scratch if pre-trained is not specified. A little costly, but less messy
+        # self.apply(seg_model_weight_initializer) #For layers not present in the snapshot ??
+        # self.load_weights(pretrained_path)
+        # branch_chans = [64, 96, 160, 320]
+        self.out_channels = [x * block.expansion for x in branch_chans]
+
+    def _make_layer(self, block, out_chan, bnum, stride=1):
+        layers = [block(self.inplanes, out_chan, stride=stride)]
+        self.inplanes = out_chan * block.expansion
+        for i in range(bnum - 1):
+            layers.append(block(self.inplanes, out_chan, stride=1))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv0(x)
+        x = self.relu0(self.bn0(x))
+        x = self.relu1(self.bn1(self.conv1(x)))
+        feat4 = self.relu2(self.bn2(self.conv2(x)))
+
+        feat8 = self.layer1(feat4)  # 1/8
+        feat16 = self.layer2(feat8)  # 1/16
+        feat32 = self.layer3(feat16)  # 1/32
+        return feat4, feat8, feat16, feat32
+
+    def load_weights(self, pretrained_path=None):
+        if not pretrained_path:
+            pretrained_path = self.pretrained_path
+        if self.pretrained_path or pretrained_path:
+            pretrained_dict = torch.load(
+                pretrained_path, map_location={"cuda:0": "cpu"}
+            )
+            print(f"Loading backbone weights from {pretrained_path} with strict=False")
+            print(f"Caution!! Things could silently fail here")
+            self.load_state_dict(pretrained_dict, strict=False)
+        else:
+            print("No backbone weights loaded")
+
+
+##########################################################################################
+##### 3 Stage ResNet Instantiations
+##### These backbones do not differ between imagenet and cityscapes
+##########################################################################################
+
+
+def Resnet122N(**kwargs):
+    model = ResNetNarrow(
+        BasicBlock, [16, 24, 20], [2, 2, 2], branch_chans=[64, 96, 160, 320]
+    )
+    return model
+
+
+def Resnet74N(**kwargs):
+    model = ResNetNarrow(
+        BasicBlock, [8, 12, 16], [2, 2, 2], branch_chans=[64, 96, 160, 320]
+    )
+    return model
+
+
+def Resnet46N(**kwargs):
+    model = ResNetNarrow(
+        BasicBlock, [6, 8, 8], [2, 2, 2], branch_chans=[64, 96, 160, 320]
+    )
+    return model
+
+
+def Resnet122NS(**kwargs):
+    model = ResNetNarrow(
+        BasicBlock, [16, 24, 20], [2, 2, 2], branch_chans=[64, 64, 128, 256]
+    )
+    return model
+
+
+def Resnet74NS(**kwargs):
+    model = ResNetNarrow(
+        BasicBlock, [8, 12, 16], [2, 2, 2], branch_chans=[64, 64, 128, 256]
+    )
+    return model
+
+
+def Resnet46NS(**kwargs):
+    model = ResNetNarrow(
+        BasicBlock, [6, 8, 8], [2, 2, 2], branch_chans=[64, 64, 128, 256]
+    )
+    return model
diff --git a/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa22c6702660d8bee9835bd3a91cb472513e0ec2
--- /dev/null
+++ b/model_farm_ffnet54s_qcs6490_qnn2.16_w8a16_aidlite/python/models/utils.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2022 Qualcomm Technologies, Inc.
+# All Rights Reserved.
+
+import torch
+from torch import nn
+from torch.nn import init
+import numpy as np
+
+
+def dense_kernel_initializer(tensor):
+    _, fan_out = nn.init._calculate_fan_in_and_fan_out(tensor)
+    init_range = 1.0 / np.sqrt(fan_out)
+
+    return nn.init.uniform_(tensor, a=-init_range, b=init_range)
+
+
+def model_weight_initializer(m):
+    """
+    Usage:
+        model = Model()
+        model.apply(weight_init)
+    """
+    if isinstance(m, nn.Conv2d):
+        # Yes, this non-fancy init is on purpose,
+        # and seems to work better in practice for segmentation
+        if hasattr(m, "weight"):
+            nn.init.normal_(m.weight, std=0.01)
+        if m.bias is not None:
+            nn.init.constant_(m.bias, 0.0001)
+
+    elif isinstance(m, nn.BatchNorm2d):
+        nn.init.constant_(m.weight, 1)
+        nn.init.constant_(m.bias, 0)
+
+    elif isinstance(m, nn.Linear):
+        dense_kernel_initializer(m.weight.data)
+        if m.bias is not None:
+            nn.init.zeros_(m.bias.data)
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9da5d40966672c087cb91011395f0e6d4a46976a
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/README.md
@@ -0,0 +1,55 @@
+## Model Information
+### Source model
+- Input shape: 1x3x512x1024   
+- Number of parameters:18.04M
+- Model size:69.4MB, 
+- Output shape: 1x19x64x128
+
+Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+
+### Converted model
+
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: SNM972 QCS8550
+
+## Inference with AidLite SDK
+
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+
+- install AidLite SDK
+
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+
+- Verify AidLite SDK
+
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+
+### Run demo
+#### python
+```bash
+cd python 
+python3 demo_qnn.py
+```
+
+#### c++
+```bash
+cd ffnet54s/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp
+mkdir build && cd build 
+cmake ..
+make
+./run_test
+```
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/CMakeLists.txt
@@ -0,0 +1,31 @@
+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+
+find_package(OpenCV REQUIRED)
+
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+
+link_directories(
+    /usr/local/lib/
+)
+
+file(GLOB SRC_LISTS 
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp  
+)
+
+add_executable(run_test ${SRC_LISTS})
+
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6406baac6c714f9aa87196ad9f7ef85783a87a64
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/cpp/run_test.cpp
@@ -0,0 +1,365 @@
+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+
+struct Args {
+    std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[2]*src_dims[3] + 
+                                    current_coordinate[3];
+
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[3]];
+
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+
+    return EXIT_SUCCESS;
+}
+
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+
+
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+
+    }
+
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", 
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+
+
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b95ee0bb595dc5b18cd5d30a2f9a2563db8aa52
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e67a07dc0454b16d3363f2b0c92dcc87a10e3dc895fc1571b33bc2df53e3a81
+size 36449096
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..27172051a516354af0da0271da4b8f4d2dde4de5
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_fp16_aidlite/python/demo_qnn.py
@@ -0,0 +1,133 @@
+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+
+
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+
+
+
+class ffnet54sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_fp16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+   
+        print(" model load success!")
+
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+
+
+
+
+
+ffnet_segm = ffnet54sQnn()
+
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data 
+max_probs, predictions = output_data.max(1)
+
+
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()
+
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/README.md b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5b591af7035053aa002c834fcc9ca037bfa714c4
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/README.md
@@ -0,0 +1,55 @@
+## Model Information
+### Source model
+- Input shape: 1x3x512x1024   
+- Number of parameters:18.04M
+- Model size:69.4MB, 
+- Output shape: 1x19x64x128
+
+Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+
+### Converted model
+
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: SNM972 QCS8550
+
+## Inference with AidLite SDK
+
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+
+- install AidLite SDK
+
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+
+- Verify AidLite SDK
+
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+
+### Run demo
+#### python
+```bash
+cd python 
+python3 demo_qnn.py
+```
+
+#### c++
+```bash
+cd ffnet54s/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp
+mkdir build && cd build 
+cmake ..
+make
+./run_test
+```
\ No newline at end of file
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt
@@ -0,0 +1,31 @@
+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+
+find_package(OpenCV REQUIRED)
+
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+
+link_directories(
+    /usr/local/lib/
+)
+
+file(GLOB SRC_LISTS 
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp  
+)
+
+add_executable(run_test ${SRC_LISTS})
+
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..54c6d91879d41f2f63d566935d580525e53ed9b2
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/cpp/run_test.cpp
@@ -0,0 +1,365 @@
+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+
+struct Args {
+    std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[2]*src_dims[3] + 
+                                    current_coordinate[3];
+
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[3]];
+
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+
+    return EXIT_SUCCESS;
+}
+
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+
+
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+
+    }
+
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", 
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+
+
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin
new file mode 100644
index 0000000000000000000000000000000000000000..879e8d7d6b4c74e71402381d8a9e6a95613d1745
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b42a1d0fa4cf46db4a4c6559188665f2cc8c1a5f1709fffbebd20df55f8c0a94
+size 18291888
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..b42dab13ce7ea1acbf84d8a68c95a69de7978597
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py
@@ -0,0 +1,133 @@
+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+
+
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+
+
+
+class ffnet54sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a8.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+   
+        print(" model load success!")
+
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+
+
+
+
+
+ffnet_segm = ffnet54sQnn()
+
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data 
+max_probs, predictions = output_data.max(1)
+
+
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()
+
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/README.md b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3ab54a7d143393e53d0a38fb84fd0659c4167b10
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/README.md
@@ -0,0 +1,55 @@
+## Model Information
+### Source model
+- Input shape: 1x3x512x1024   
+- Number of parameters:18.04M
+- Model size:69.4MB, 
+- Output shape: 1x19x64x128
+
+Source model repository: [ffnet54s](https://github.com/Qualcomm-AI-research/FFNet/tree/master)
+
+### Converted model
+
+- Precision: W8A16
+- Backend: QNN2.16
+- Target Device: SNM972 QCS8550
+
+## Inference with AidLite SDK
+
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+
+- install AidLite SDK
+
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+```
+
+- Verify AidLite SDK
+
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite ; print(aidlite.get_library_version())"
+
+# aidlite sdk python check
+python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
+```
+
+### Run demo
+#### python
+```bash
+cd python 
+python3 demo_qnn.py
+```
+
+#### c++
+```bash
+cd ffnet54s/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp
+mkdir build && cd build 
+cmake ..
+make
+./run_test
+```
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7465fea6f19609492849a263dbd1909ae185636d
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/CMakeLists.txt
@@ -0,0 +1,31 @@
+cmake_minimum_required (VERSION 3.5)
+project("run_test")
+
+find_package(OpenCV REQUIRED)
+
+message(STATUS "oPENCV Library status:")
+message(STATUS ">version:${OpenCV_VERSION}")
+message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
+
+set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
+
+include_directories(
+    /usr/local/include
+    /usr/include/opencv4
+)
+
+link_directories(
+    /usr/local/lib/
+)
+
+file(GLOB SRC_LISTS 
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp  
+)
+
+add_executable(run_test ${SRC_LISTS})
+
+target_link_libraries(run_test
+    aidlite
+	${OpenCV_LIBS}
+    pthread
+)
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..98864a05827949b978ded878584179bcf01aff06
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/cpp/run_test.cpp
@@ -0,0 +1,365 @@
+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <cctype>
+#include <cstring> // 用于 memcpy
+#include <opencv2/opencv.hpp>
+#include <aidlux/aidlite/aidlite.hpp>
+#include <vector>
+#include <numeric>
+#include <cmath>
+#include <array>
+#include <cstdint>
+
+using namespace cv;
+using namespace std;
+using namespace Aidlux::Aidlite;
+
+// 定義顏色表 (19個類別)
+const std::array<std::array<uint8_t, 3>, 19> label_colors = {{
+    {0, 0, 0},          // 0=background
+    {128, 0, 0},        // 1=aeroplane
+    {0, 128, 0},        // 2=bicycle
+    {128, 128, 0},      // 3=bird
+    {0, 0, 128},        // 4=boat
+    {128, 0, 128},      // 5=bottle
+    {0, 128, 128},      // 6=bus
+    {128, 128, 128},    // 7=car
+    {64, 0, 0},         // 8=cat
+    {192, 0, 0},        // 9=chair
+    {64, 128, 0},       // 10=cow
+    {192, 128, 0},      // 11=dining table
+    {64, 0, 128},       // 12=dog
+    {192, 0, 128},      // 13=horse
+    {64, 128, 128},     // 14=motorbike
+    {192, 128, 128},    // 15=person
+    {0, 64, 0},         // 16=potted plant
+    {128, 64, 0},       // 17=sheep
+    {0, 192, 0},        // 18=sofa
+}};
+
+// 雙線性插值 (輸入佈局: NCHW, n=1 簡化為 CHW)
+std::vector<float> bilinear_interpolate(
+    const float* input, int src_h, int src_w, int target_h, int target_w, int channels) {
+    
+    std::vector<float> output(target_h * target_w * channels, 0.0f);
+    const float scale_h = static_cast<float>(src_h) / target_h;
+    const float scale_w = static_cast<float>(src_w) / target_w;
+
+    for (int y = 0; y < target_h; ++y) {
+        const float y_src = (y + 0.5f) * scale_h - 0.5f;
+        const int y0 = std::max(0, std::min(static_cast<int>(y_src), src_h - 1));
+        const int y1 = std::max(0, std::min(y0 + 1, src_h - 1));
+        const float dy = y_src - y0;
+
+        for (int x = 0; x < target_w; ++x) {
+            const float x_src = (x + 0.5f) * scale_w - 0.5f;
+            const int x0 = std::max(0, std::min(static_cast<int>(x_src), src_w - 1));
+            const int x1 = std::max(0, std::min(x0 + 1, src_w - 1));
+            const float dx = x_src - x0;
+
+            for (int c = 0; c < channels; ++c) {
+                const int src_idx = c * src_h * src_w;
+                const float val00 = input[src_idx + y0 * src_w + x0];
+                const float val01 = input[src_idx + y0 * src_w + x1];
+                const float val10 = input[src_idx + y1 * src_w + x0];
+                const float val11 = input[src_idx + y1 * src_w + x1];
+
+                const float val = (1 - dy) * (1 - dx) * val00 +
+                                 (1 - dy) * dx * val01 +
+                                 dy * (1 - dx) * val10 +
+                                 dy * dx * val11;
+
+                output[c * target_h * target_w + y * target_w + x] = val;
+            }
+        }
+    }
+    return output;
+}
+
+// Softmax 計算 (通道維度)
+void softmax(float* data, int height, int width, int channels) {
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_val = -INFINITY;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                max_val = std::max(max_val, data[idx]);
+            }
+
+            float sum_exp = 0.0f;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                sum_exp += std::exp(data[idx] - max_val);
+            }
+
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                data[idx] = std::exp(data[idx] - max_val) / sum_exp;
+            }
+        }
+    }
+}
+
+// 提取最大類別索引
+std::vector<uint8_t> compute_predictions(const float* data, int height, int width, int channels) {
+    std::vector<uint8_t> pred(height * width);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            float max_prob = -INFINITY;
+            uint8_t max_idx = 0;
+            for (int c = 0; c < channels; ++c) {
+                const int idx = c * height * width + y * width + x;
+                if (data[idx] > max_prob) {
+                    max_prob = data[idx];
+                    max_idx = c;
+                }
+            }
+            pred[y * width + x] = max_idx;
+        }
+    }
+    return pred;
+}
+
+// 解碼為 RGB 圖像
+std::vector<uint8_t> decode_segmap(const std::vector<uint8_t>& pred, int height, int width) {
+    std::vector<uint8_t> rgb(height * width * 3);
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+            const int idx = y * width + x;
+            const uint8_t label = pred[idx];
+            if (label < 19) {
+                rgb[idx * 3]     = label_colors[label][0];
+                rgb[idx * 3 + 1] = label_colors[label][1];
+                rgb[idx * 3 + 2] = label_colors[label][2];
+            } else {
+                rgb[idx * 3] = rgb[idx * 3 + 1] = rgb[idx * 3 + 2] = 0;
+            }
+        }
+    }
+    return rgb;
+}
+
+struct Args {
+    std::string target_model = "../../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin";
+    std::string imgs = "../2.png";
+    int invoke_nums = 10;
+    std::string model_type = "QNN";
+};
+
+Args parse_args(int argc, char* argv[]) {
+    Args args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--target_model" && i + 1 < argc) {
+            args.target_model = argv[++i];
+        } else if (arg == "--imgs" && i + 1 < argc) {
+            args.imgs = argv[++i];
+        } else if (arg == "--invoke_nums" && i + 1 < argc) {
+            args.invoke_nums = std::stoi(argv[++i]);
+        } else if (arg == "--model_type" && i + 1 < argc) {
+            args.model_type = argv[++i];
+        }
+    }
+    return args;
+}
+
+std::string to_lower(const std::string& str) {
+    std::string lower_str = str;
+    std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return lower_str;
+}
+
+int transpose(float* src, unsigned int* src_dims, unsigned int* tsp_dims, float* dest){
+
+    int current_coordinate[4] = {0, 0, 0, 0};
+    for(int a = 0; a < src_dims[0]; ++a){
+        current_coordinate[0] = a;
+        for(int b = 0; b < src_dims[1]; ++b){
+            current_coordinate[1] = b;
+            for(int c = 0; c < src_dims[2]; ++c){
+                current_coordinate[2] = c;
+                for(int d = 0; d < src_dims[3]; ++d){
+                    current_coordinate[3] = d;
+
+                    int old_index = current_coordinate[0]*src_dims[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[1]*src_dims[2]*src_dims[3] + 
+                                    current_coordinate[2]*src_dims[3] + 
+                                    current_coordinate[3];
+
+                    int new_index = current_coordinate[tsp_dims[0]]*src_dims[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[1]]*src_dims[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[2]]*src_dims[tsp_dims[3]] + 
+                                    current_coordinate[tsp_dims[3]];
+
+                    dest[new_index] = src[old_index];
+                }
+            }
+        }
+    }
+
+    return EXIT_SUCCESS;
+}
+
+cv::Mat post_process(cv::Mat &frame, float* outdata)
+{
+    cv::Mat input_image = frame.clone();
+    // Initialize vectors to hold respective outputs while unwrapping detections.
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+    std::vector<cv::Mat> masks;
+    std::vector<float> class_scores;
+    cv::RNG rng;
+    cv::Mat masked_img;
+
+    unsigned int src_dims[4] = {1, 64,128,19};
+    unsigned int tsp_dims[4] = {0,3,1,2};
+    unsigned int stride_data_num = 1*64*128*19;
+    float* format_data = new float[stride_data_num];
+    transpose(outdata, src_dims, tsp_dims, format_data);
+    cv::Mat proto_buffer(19,64*128, CV_32F, format_data);
+    std::cout << "proto_buffer 维度: " << proto_buffer.rows << "x" << proto_buffer.cols << std::endl;
+
+    const int channels = 19;
+    int target_h = 64, target_w = 128;
+    int src_h = 64, src_w = 128;
+    // Step 1: 雙線性插值
+    auto interpolated = bilinear_interpolate(format_data, src_h, src_w, target_h, target_w, channels);
+
+    // Step 2: Softmax
+    softmax(interpolated.data(), target_h, target_w, channels);
+
+    // Step 3: 獲取預測類別
+    auto predictions = compute_predictions(interpolated.data(), target_h, target_w, channels);
+
+    printf("Processing finished.\n");
+    // Step 4: 解碼為 RGB
+    std::vector<uint8_t> rgb_data = decode_segmap(predictions, target_h, target_w);
+    cv::Mat image(64, 128, CV_MAKETYPE(CV_8U, 3), const_cast<uint8_t*>(rgb_data.data()));
+
+    // Step 2: 转换颜色通道 (RGB → BGR)
+    if (channels == 3) {
+        cv::cvtColor(image, image, cv::COLOR_RGB2BGR);
+    }
+    cv::Mat resized_cubic;
+    cv::resize(image, resized_cubic, cv::Size(1024,512), 0, 0, cv::INTER_CUBIC);
+    return resized_cubic;
+}
+
+
+int invoke(const Args& args) {
+    std::cout << "Start main ... ... Model Path: " << args.target_model << "\n"
+              << "Image Path: " << args.imgs << "\n"
+              << "Inference Nums: " << args.invoke_nums << "\n"
+              << "Model Type: " << args.model_type << "\n";
+    Model* model = Model::create_instance(args.target_model);
+    if(model == nullptr){
+        printf("Create model failed !\n");
+        return EXIT_FAILURE;
+    }
+    Config* config = Config::create_instance();
+    if(config == nullptr){
+        printf("Create config failed !\n");
+        return EXIT_FAILURE;
+    }
+    config->implement_type = ImplementType::TYPE_LOCAL;
+    std::string model_type_lower = to_lower(args.model_type);
+    if (model_type_lower == "qnn"){
+        config->framework_type = FrameworkType::TYPE_QNN;
+    } else if (model_type_lower == "snpe2" || model_type_lower == "snpe") {
+        config->framework_type = FrameworkType::TYPE_SNPE2;
+    }
+    config->accelerate_type = AccelerateType::TYPE_DSP;
+    config->is_quantify_model = 1;
+
+    unsigned int model_h = 512;
+    unsigned int model_w = 1024;
+    std::vector<std::vector<uint32_t>> input_shapes = {{1,3,model_h,model_w}};
+    std::vector<std::vector<uint32_t>> output_shapes = {{1,64,128,19}};
+    model->set_model_properties(input_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
+    std::unique_ptr<Interpreter> fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+    if(fast_interpreter == nullptr){
+        printf("build_interpretper_from_model_and_config failed !\n");
+        return EXIT_FAILURE;
+    }
+    int result = fast_interpreter->init();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->init() failed !\n");
+        return EXIT_FAILURE;
+    }
+    // load model
+    fast_interpreter->load_model();
+    if(result != EXIT_SUCCESS){
+        printf("interpreter->load_model() failed !\n");
+        return EXIT_FAILURE;
+    }
+    printf("detect model load success!\n");
+
+    cv::Mat frame = cv::imread(args.imgs);
+    if (frame.empty()) {
+        printf("detect image load failed!\n");
+        return 1;
+    }
+    printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
+    cv::Mat input_data;
+    cv::Mat frame_clone = frame.clone();
+    cv::Scalar stds_scale(58.395, 57.12, 57.375);
+    cv::Scalar means_scale(123.675, 116.28, 103.53);
+    cv::resize(frame_clone, frame_clone, cv::Size(model_w, model_h));
+    frame_clone.convertTo(input_data, CV_32F);
+    cv::subtract(input_data, means_scale, input_data);
+    cv::divide(input_data, stds_scale, input_data);
+
+    float *outdata0 = nullptr;
+    std::vector<float> invoke_time;
+    for (int i = 0; i < args.invoke_nums; ++i) {
+        result = fast_interpreter->set_input_tensor(0, input_data.data);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->set_input_tensor() failed !\n");
+            return EXIT_FAILURE;
+        }
+        auto t1 = std::chrono::high_resolution_clock::now();
+        result = fast_interpreter->invoke();
+        auto t2 = std::chrono::high_resolution_clock::now();
+        std::chrono::duration<double> cost_time = t2 - t1;
+        invoke_time.push_back(cost_time.count() * 1000);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->invoke() failed !\n");
+            return EXIT_FAILURE;
+        }
+        uint32_t out_data_0 = 0;
+        result = fast_interpreter->get_output_tensor(0, (void**)&outdata0, &out_data_0);
+        if(result != EXIT_SUCCESS){
+            printf("interpreter->get_output_tensor() 1 failed !\n");
+            return EXIT_FAILURE;
+        }
+
+    }
+
+    float max_invoke_time = *std::max_element(invoke_time.begin(), invoke_time.end());
+    float min_invoke_time = *std::min_element(invoke_time.begin(), invoke_time.end());
+    float mean_invoke_time = std::accumulate(invoke_time.begin(), invoke_time.end(), 0.0f) / args.invoke_nums;
+    float var_invoketime = 0.0f;
+    for (auto time : invoke_time) {
+        var_invoketime += (time - mean_invoke_time) * (time - mean_invoke_time);
+    }
+    var_invoketime /= args.invoke_nums;
+    printf("=======================================\n");
+    printf("QNN inference %d times :\n --mean_invoke_time is %f \n --max_invoke_time is %f \n --min_invoke_time is %f \n --var_invoketime is %f\n", 
+        args.invoke_nums, mean_invoke_time, max_invoke_time, min_invoke_time, var_invoketime);
+    printf("=======================================\n");
+
+    cv::Mat img = post_process(frame, outdata0);
+    cv::imwrite("./results.png", img);
+    fast_interpreter->destory();
+    return 0;
+}
+
+
+int main(int argc, char* argv[]) {
+    Args args = parse_args(argc, argv);
+    return invoke(args);
+}
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin
new file mode 100644
index 0000000000000000000000000000000000000000..262febd7d807c4295436ab7949dce159f7ec8248
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:280ea671091e591e615d5378b33d7839622c4467db70c4ff068b4fd903901fdf
+size 18418864
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d292c6a07cd739952b6219e069c47751be862
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:202d18312933ddf6782805ee1be485b149f95d15c087e31d10b6510fcdea6101
+size 2281350
diff --git a/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0121a28b8a07382c6d402ef086c58e8019d35c3
--- /dev/null
+++ b/model_farm_ffnet54s_qcs8550_qnn2.16_w8a16_aidlite/python/demo_qnn.py
@@ -0,0 +1,133 @@
+import numpy as np
+import torch
+import cv2
+import sys
+import time
+import aidlite
+import os
+
+
+def decode_segmap(image, nc=19):
+    label_colors = np.array([(0, 0, 0),  # 0=background
+                # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
+                (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
+                # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
+                (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
+                # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
+                (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+                # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
+                (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
+    r = np.zeros_like(image).astype(np.uint8)
+    g = np.zeros_like(image).astype(np.uint8)
+    b = np.zeros_like(image).astype(np.uint8)
+    for l in range(0, nc):
+        idx = image == l
+        r[idx] = label_colors[l, 0]
+        g[idx] = label_colors[l, 1]
+        b[idx] = label_colors[l, 2]
+    rgb = np.stack([r, g, b], axis=2)
+    return rgb
+
+
+
+class ffnet54sQnn:
+    def __init__(self):
+        super().__init__()
+        self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/ffnet54S_dBBB_cityscapes_state_dict_quarts_w8a16.qnn216.ctx.bin"))
+        if self.model is None:
+            print("Create model failed !")
+            return
+
+        self.config = aidlite.Config.create_instance()
+        if self.config is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+
+        self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+        self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
+        self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+        # self.config.accelerate_type = aidlite.AccelerateType.TYPE_CPU
+        self.config.is_quantify_model = 1
+        
+        self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+            return
+        input_shapes = [[1,512,1024,3]]
+        output_shapes = [[1,64,128,19]]
+        self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                                output_shapes, aidlite.DataType.TYPE_FLOAT32)
+
+        if self.interpreter is None:
+            print("build_interpretper_from_model_and_config failed !")
+        result = self.interpreter.init()
+        if result != 0:
+            print(f"interpreter init failed !")
+        result = self.interpreter.load_model()
+        if result != 0:
+            print("interpreter load model failed !")
+   
+        print(" model load success!")
+
+    def __call__(self, input):
+        self.interpreter.set_input_tensor(0,input)
+        invoke_time=[]
+        invoke_nums =10
+        for i in range(invoke_nums):
+            result = self.interpreter.set_input_tensor(0, input.data)
+            if result != 0:
+                print("interpreter set_input_tensor() failed")
+            t1=time.time()
+            result = self.interpreter.invoke()
+            cost_time = (time.time()-t1)*1000
+            invoke_time.append(cost_time)
+
+        max_invoke_time = max(invoke_time)
+        min_invoke_time = min(invoke_time)
+        mean_invoke_time = sum(invoke_time)/invoke_nums
+        var_invoketime=np.var(invoke_time)
+        print("====================================")
+        print(f"QNN  invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+        print("====================================")
+        features_0 = self.interpreter.get_output_tensor(0).reshape(1, 64,128,19).copy()
+        return features_0
+
+
+
+
+
+ffnet_segm = ffnet54sQnn()
+
+frame_ct=0
+image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"2.png")
+
+image = cv2.imread(image_path)
+image=cv2.resize(image,(1024,512))
+frame = np.ascontiguousarray(image[:,:,::-1])
+
+mean_data=[123.675, 116.28, 103.53]
+std_data=[58.395, 57.12, 57.375]
+img_input = (frame-mean_data)/std_data  # HWC
+input = img_input.astype(np.float32)
+input = input[np.newaxis, ...]
+input_size = input.shape[1], input.shape[2]  #H w
+t0 = time.time()
+out = ffnet_segm(input)
+use_time = round((time.time() - t0) * 1000, 2)
+print(f"pose detction inference_time:{use_time} ms")
+out = np.transpose(out, (0, 3, 1,2))
+out = torch.from_numpy(out)
+
+output = torch.nn.functional.interpolate(
+        out, size=input_size, mode="bilinear", align_corners=False
+    )
+output_data = torch.nn.functional.softmax(output, dim=1).data 
+max_probs, predictions = output_data.max(1)
+
+
+prediction = predictions.numpy().astype(np.uint8)
+test = decode_segmap( prediction[0])
+
+cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), test[:,:,::-1])
+ffnet_segm.interpreter.destory()
+