Upload 16 files

Browse files

Files changed (10) hide show

model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/README.md +59 -59
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +35 -35
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/run_yolov5.cpp +454 -454
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/models/cutoff_yolov5n_w8a8.qnn216.ctx.bin +3 -0
model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py +338 -338
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/README.md +59 -59
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +35 -35
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/run_yolov5.cpp +454 -454
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/models/cutoff_yolov5n_w8a8.qnn216.ctx.bin +3 -0
model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py +338 -338

model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/README.md CHANGED Viewed

@@ -1,60 +1,60 @@
-## Model Information
-### Source model
-- Input shape: 640x640
-- Number of parameters: 1.968M
-- Model size: 7.56 MB
-- Output shape: 1x25200x85
-Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
-### Converted model
-- Precision: INT8
-- Backend: QNN2.16
-- Target Device: FV01 QCS6490
-## Inference with AidLite SDK
-### SDK installation
-Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
-- install AidLite SDK
-```bash
-# Install the appropriate version of the aidlite sdk
-sudo aid-pkg update
-sudo aid-pkg install aidlite-sdk
-# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
-sudo aid-pkg install aidlite-{QNN VERSION}
-# eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
-```
-- Verify AidLite SDK
-```bash
-# aidlite sdk c++ check
-python3 -c "import aidlite; print(aidlite.get_library_version())"
-# aidlite sdk python check
-python3 -c "import aidlite; print(aidlite.get_py_library_version())"
-```
-### Run python demo
-```bash
-cd python
-python3 demo_qnn.py
-```
-### Run c++ demo
-```bash
-cd yolov5n/model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp
-mkdir build
-cd build
-cmake ..
-make
-./run_yolov5
 ```

+## Model Information
+### Source model
+- Input shape: 640x640
+- Number of parameters: 1.968M
+- Model size: 7.56 MB
+- Output shape: 1x25200x85
+Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
+### Converted model
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: FV01 QCS6490
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+# eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite; print(aidlite.get_py_library_version())"
+```
+### Run python demo
+```bash
+cd python
+python3 demo_qnn.py
+```
+### Run c++ demo
+```bash
+cd yolov5n/model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp
+mkdir build
+cd build
+cmake ..
+make
+./run_yolov5
 ```

model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt CHANGED Viewed

@@ -1,36 +1,36 @@
-cmake_minimum_required(VERSION 3.12)
-project(aidlite_cpp_samples)
-set(CMAKE_BUILD_TYPE Release)
-set(OPENCV_INCLUDE_DIR      /usr/include/opencv4)
-set(OPENCV_LINK_DIR         "")
-set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库  需要注意先后顺序
-set(AIDLITE_INCLUDE_DIR     /usr/local/include)
-set(AIDLITE_LINK_DIR        /usr/local/lib)
-set(AIDLITE_LIB aidlite)
-function(func_generate_sample_exe sample_name)
-    set(demo_name ${sample_name})
-    file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
-    add_executable(${demo_name} ${src_files})
-    target_compile_options(${demo_name} PRIVATE -std=c++11)
-    target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
-    target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
-    target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
-    message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
-endfunction()
-set(SAMPLE_LIST run_yolov5)
-FOREACH(sample ${SAMPLE_LIST})
-    message("prepare to generate cpp sample : ${sample}")
-    func_generate_sample_exe(${sample})
 ENDFOREACH(sample)

+cmake_minimum_required(VERSION 3.12)
+project(aidlite_cpp_samples)
+set(CMAKE_BUILD_TYPE Release)
+set(OPENCV_INCLUDE_DIR      /usr/include/opencv4)
+set(OPENCV_LINK_DIR         "")
+set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库  需要注意先后顺序
+set(AIDLITE_INCLUDE_DIR     /usr/local/include)
+set(AIDLITE_LINK_DIR        /usr/local/lib)
+set(AIDLITE_LIB aidlite)
+function(func_generate_sample_exe sample_name)
+    set(demo_name ${sample_name})
+    file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
+    add_executable(${demo_name} ${src_files})
+    target_compile_options(${demo_name} PRIVATE -std=c++11)
+    target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
+    target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
+    target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
+    message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
+endfunction()
+set(SAMPLE_LIST run_yolov5)
+FOREACH(sample ${SAMPLE_LIST})
+    message("prepare to generate cpp sample : ${sample}")
+    func_generate_sample_exe(${sample})
 ENDFOREACH(sample)

model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/cpp/run_yolov5.cpp CHANGED Viewed

@@ -1,455 +1,455 @@
-#include <thread>
-#include <future>
-#include <opencv2/opencv.hpp>
-#include "aidlux/aidlite/aidlite.hpp"
-using namespace Aidlux::Aidlite;
-using namespace std;
-#define OBJ_CLASS_NUM     80
-#define NMS_THRESH        0.45
-#define BOX_THRESH        0.5
-#define MODEL_SIZE 640
-#define OBJ_NUMB_MAX_SIZE 64
-#define PROP_BOX_SIZE     (5+OBJ_CLASS_NUM)
-#define STRIDE8_SIZE (MODEL_SIZE / 8)
-#define STRIDE16_SIZE (MODEL_SIZE / 16)
-#define STRIDE32_SIZE (MODEL_SIZE / 32)
-const float anchor0[6] = {10, 13, 16, 30, 33, 23};
-const float anchor1[6] = {30, 61, 62, 45, 59, 119};
-const float anchor2[6] = {116, 90, 156, 198, 373, 326};
-string class_names[] = {
-	"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
-	"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
-	"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
-	"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
-	"wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
-	"carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
-	"tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
-	"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
-static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
-float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
-{
-	int w =  src->cols;
-	int h =  src->rows;
-	float scale_h = float(h) / float(height);
-	float scale_w = float(w) / float(width);
-	float scale;
-	if (scale_h > scale_w)
-	{
-	scale = scale_h;
-	}
-	else
-	{
-	scale = scale_w;
-	}
-	int rel_width = int(w / scale);
-	int rel_height = int(h / scale);
-	cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
-	cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
-	return scale;
-}
-std::vector<std::string> split(const std::string& str)
-{
-	std::stringstream ss(str);
-	std::vector<std::string> elems;
-	std::string item;
-	while (std::getline(ss, item, ','))
-	{
-		elems.push_back(item);
-	}
-	return elems;
-}
-int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
-{
-	int ct = 0;
-	int validCount = 0;
-	for (int a = 0; a < 3; a++)
-	{
-		for (int i = 0; i < grid_h; i++)
-		{
-			for (int j = 0; j < grid_w; j++)
-			{
-				int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
-				float box_confidence = sigmoid(output[idx + 4]);
-				if (box_confidence >= BOX_THRESH )
-				{
-					float box_x  = sigmoid(output[idx]) * 2 - 0.5;
-					float box_y  = sigmoid(output[idx + 1]) * 2 - 0.5;
-					float box_w  = pow(sigmoid(output[idx + 2]) * 2, 2);
-					float box_h  = pow(sigmoid(output[idx + 3]) * 2, 2);
-					box_x = (box_x + j) * (float)stride;
-					box_y = (box_y + i) * (float)stride;
-					box_w = box_w * anchor[a * 2];
-					box_h = box_h * anchor[a * 2 + 1];
-					box_x -= (box_w / 2.0);
-					box_y -= (box_h / 2.0);
-					float maxClassProbs = 0;
-					int maxClassId = 0;
-					for(int k = 0; k < OBJ_CLASS_NUM ; k++)
-					{
-						float prob = output[idx + 5 + k];
-						if (prob > maxClassProbs)
-						{
-							maxClassId = k;
-							maxClassProbs = prob;
-						}
-					}
-					if (maxClassProbs > BOX_THRESH)
-					{
-						objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
-						classId.push_back(maxClassId);
-						validCount++;
-						boxes.push_back(box_x);
-						boxes.push_back(box_y);
-						boxes.push_back(box_w);
-						boxes.push_back(box_h);
-					}
-				}
-			}
-		}
-	}
-	return validCount;
-}
-static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
-{
-	float key;
-	int   key_index;
-	int   low  = left;
-	int   high = right;
-	if (left < right) {
-		key_index = indices[left];
-		key       = input[left];
-		while (low < high) {
-			while (low < high && input[high] <= key) {
-				high--;
-			}
-			input[low]   = input[high];
-			indices[low] = indices[high];
-			while (low < high && input[low] >= key) {
-				low++;
-			}
-			input[high]   = input[low];
-			indices[high] = indices[low];
-		}
-		input[low]   = key;
-		indices[low] = key_index;
-		quick_sort_indice_inverse(input, left, low - 1, indices);
-		quick_sort_indice_inverse(input, low + 1, right, indices);
-	}
-	return low;
-}
-static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
-							float ymax1)
-{
-	float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
-	float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
-	float i = w * h;
-	float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
-	return u <= 0.f ? 0.f : (i / u);
-}
-static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
-			int filterId, float threshold)
-{
-for (int i = 0; i < validCount; ++i) {
-	if (order[i] == -1 || classIds[i] != filterId) {
-		continue;
-	}
-	int n = order[i];
-	for (int j = i + 1; j < validCount; ++j) {
-		int m = order[j];
-		if (m == -1 || classIds[i] != filterId) {
-			continue;
-		}
-		float xmin0 = outputLocations[n * 4 + 0];
-		float ymin0 = outputLocations[n * 4 + 1];
-		float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
-		float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
-		float xmin1 = outputLocations[m * 4 + 0];
-		float ymin1 = outputLocations[m * 4 + 1];
-		float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
-		float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
-		float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
-		if (iou > threshold) {
-			order[j] = -1;
-		}
-	}
-}
-	return 0;
-}
-int32_t thread_func(int thread_idx){
-	printf("entry thread_func[%d]\n", thread_idx);
-	std::string image_path = "../bus.jpg";
-	std::string save_name = "out_yolov5_qnn";
-	std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem";
-	// image process
-	cv::Mat frame = cv::imread(image_path);
-	cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
-	cv::Scalar stds_scale(255, 255, 255);
-	cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
-	cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
-	float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
-	cv::Mat input_data;
-	frame_resized.convertTo(input_data, CV_32FC3);
-	cv::divide(input_data, stds_scale, input_data);
-	// model init
-	printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
-	// 以下三个接口请按需组合调用。如果不调用这些函数，默认只打印错误日志到标准错误终端。
-	Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
-	Aidlux::Aidlite::log_to_stderr();
-	// Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
-	Model* model = Model::create_instance(model_path);
-	if(model == nullptr){
-		printf("Create Model object failed !\n");
-		return EXIT_FAILURE;
-	}
-	std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
-	std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
-	model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
-	Config* config = Config::create_instance();
-	if(config == nullptr){
-		printf("Create Config object failed !\n");
-		return EXIT_FAILURE;
-	}
-	config->implement_type = ImplementType::TYPE_LOCAL;
-	config->framework_type = FrameworkType::TYPE_QNN216;
-	config->accelerate_type = AccelerateType::TYPE_DSP;
-	std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
-	if(fast_interpreter == nullptr){
-		printf("build_interpretper_from_model_and_config failed !\n");
-		return EXIT_FAILURE;
-	}
-	int result = fast_interpreter->init();
-	if(result != EXIT_SUCCESS){
-		printf("interpreter->init() failed !\n");
-		return EXIT_FAILURE;
-	}
-	result = fast_interpreter->load_model();
-	if(result != EXIT_SUCCESS){
-		printf("interpreter->load_model() failed !\n");
-		return EXIT_FAILURE;
-	}
-	printf("load model load success!\n");
-	float* stride8 = nullptr;
-	float* stride16 = nullptr;
-	float* stride32 = nullptr;
-	// post_process
-	std::vector<float> filterBoxes;
-	std::vector<float> objProbs;
-	std::vector<int>   classId;
-	double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
-	int _counter = 10;
-	for(int idx = 0; idx < _counter; ++idx){
-		std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
-		void* input_tensor_data = (void*)input_data.data;
-		result = fast_interpreter->set_input_tensor(0,input_tensor_data);
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->set_input_tensor() failed !\n");
-			return EXIT_FAILURE;
-		}
-		std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
-		std::chrono::steady_clock::duration dur0 = et0 - st0;
-		printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
-		sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
-		std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
-		result = fast_interpreter->invoke();
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->invoke() failed !\n");
-			return EXIT_FAILURE;
-		}
-		std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
-		std::chrono::steady_clock::duration dur1 = et1 - st1;
-		printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
-		sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
-		std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
-		uint32_t output_tensor_length_0 = 0;
-		result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->get_output_tensor() 0 failed !\n");
-			return EXIT_FAILURE;
-		}
-		printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
-		uint32_t output_tensor_length_1 = 0;
-		result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->get_output_tensor() 1 failed !\n");
-			return EXIT_FAILURE;
-		}
-		printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
-		uint32_t output_tensor_length_2 = 0;
-		result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->get_output_tensor() 2 failed !\n");
-			return EXIT_FAILURE;
-		}
-		printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
-		std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
-		std::chrono::steady_clock::duration dur2 = et2 - st2;
-		printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
-		sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
-	}
-	printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
-	std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
-	filterBoxes.clear();
-	objProbs.clear();
-	classId.clear();
-	int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
-	int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
-	int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
-	int validCount = validCount0 + validCount1 +validCount2;
-	std::vector<int> indexArray;
-	for (int i = 0; i < validCount; ++i){
-		indexArray.push_back(i);
-	}
-	quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
-	std::set<int> class_set(std::begin(classId), std::end(classId));
-	for (auto c : class_set) {
-		nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
-	}
-	std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
-	std::chrono::steady_clock::duration durpp = ppe - pps;
-	printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
-	// 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
-	const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
-	const float expected_box_5[1][4] = {{91,  131, 551, 464}};
-	unsigned int box_count = 0;
-	unsigned int verify_pass_count = 0;
-	for (int i = 0; i < validCount; ++i) {
-		if (indexArray[i] == -1) {
-			continue;
-		}
-		int n = indexArray[i];
-		float x1       = filterBoxes[n * 4 + 0] * scale;
-		float y1       = filterBoxes[n * 4 + 1] * scale;
-		float x2       = x1 + filterBoxes[n * 4 + 2] * scale;
-		float y2       = y1 + filterBoxes[n * 4 + 3] * scale;
-		int   id       = classId[n];
-		float obj_conf = objProbs[i];
-	//  string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
-		string show_info = class_names[id] + ": " + to_string(obj_conf);
-		cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2);   // color-BGR
-		cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
-		// 结果正确性验证
-		printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
-		++box_count;
-		if(id == 0){
-			for(int idx = 0; idx < 3; ++idx){
-				float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
-							expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
-				printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
-				if(coverage_ratio > 0.9){
-					++verify_pass_count;
-					break;
-				}
-			}
-		}else if(id == 5){
-			for(int idx = 0; idx < 1; ++idx){
-				float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
-							expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
-				printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
-				if(coverage_ratio > 0.9){
-					++verify_pass_count;
-					break;
-				}
-			}
-		}else{
-			printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
-			return EXIT_FAILURE;
-		}
-	}
-	// 保存结果图片
-	cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
-	cv::imwrite("result.jpg", frame);
-	result = fast_interpreter->destory();
-	if(result != EXIT_SUCCESS){
-		printf("interpreter->destory() failed !\n");
-		return EXIT_FAILURE;
-	}
-	printf("exit thread_func[%d]\n", thread_idx);
-	return EXIT_SUCCESS;
-}
-int main(int argc, char** args)
-{
-	std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
-	if(EXIT_SUCCESS != thread_01_result.get()){
-		printf("ERROR : thread_01 run failed.\n");
-		return EXIT_FAILURE;
-	}
-	printf("Exit main function .\n");
-	return 0;
 }

+#include <thread>
+#include <future>
+#include <opencv2/opencv.hpp>
+#include "aidlux/aidlite/aidlite.hpp"
+using namespace Aidlux::Aidlite;
+using namespace std;
+#define OBJ_CLASS_NUM     80
+#define NMS_THRESH        0.45
+#define BOX_THRESH        0.5
+#define MODEL_SIZE 640
+#define OBJ_NUMB_MAX_SIZE 64
+#define PROP_BOX_SIZE     (5+OBJ_CLASS_NUM)
+#define STRIDE8_SIZE (MODEL_SIZE / 8)
+#define STRIDE16_SIZE (MODEL_SIZE / 16)
+#define STRIDE32_SIZE (MODEL_SIZE / 32)
+const float anchor0[6] = {10, 13, 16, 30, 33, 23};
+const float anchor1[6] = {30, 61, 62, 45, 59, 119};
+const float anchor2[6] = {116, 90, 156, 198, 373, 326};
+string class_names[] = {
+	"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+	"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
+	"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
+	"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+	"wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
+	"carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
+	"tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
+	"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
+static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
+float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
+{
+	int w =  src->cols;
+	int h =  src->rows;
+	float scale_h = float(h) / float(height);
+	float scale_w = float(w) / float(width);
+	float scale;
+	if (scale_h > scale_w)
+	{
+	scale = scale_h;
+	}
+	else
+	{
+	scale = scale_w;
+	}
+	int rel_width = int(w / scale);
+	int rel_height = int(h / scale);
+	cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
+	cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
+	return scale;
+}
+std::vector<std::string> split(const std::string& str)
+{
+	std::stringstream ss(str);
+	std::vector<std::string> elems;
+	std::string item;
+	while (std::getline(ss, item, ','))
+	{
+		elems.push_back(item);
+	}
+	return elems;
+}
+int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
+{
+	int ct = 0;
+	int validCount = 0;
+	for (int a = 0; a < 3; a++)
+	{
+		for (int i = 0; i < grid_h; i++)
+		{
+			for (int j = 0; j < grid_w; j++)
+			{
+				int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
+				float box_confidence = sigmoid(output[idx + 4]);
+				if (box_confidence >= BOX_THRESH )
+				{
+					float box_x  = sigmoid(output[idx]) * 2 - 0.5;
+					float box_y  = sigmoid(output[idx + 1]) * 2 - 0.5;
+					float box_w  = pow(sigmoid(output[idx + 2]) * 2, 2);
+					float box_h  = pow(sigmoid(output[idx + 3]) * 2, 2);
+					box_x = (box_x + j) * (float)stride;
+					box_y = (box_y + i) * (float)stride;
+					box_w = box_w * anchor[a * 2];
+					box_h = box_h * anchor[a * 2 + 1];
+					box_x -= (box_w / 2.0);
+					box_y -= (box_h / 2.0);
+					float maxClassProbs = 0;
+					int maxClassId = 0;
+					for(int k = 0; k < OBJ_CLASS_NUM ; k++)
+					{
+						float prob = output[idx + 5 + k];
+						if (prob > maxClassProbs)
+						{
+							maxClassId = k;
+							maxClassProbs = prob;
+						}
+					}
+					if (maxClassProbs > BOX_THRESH)
+					{
+						objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
+						classId.push_back(maxClassId);
+						validCount++;
+						boxes.push_back(box_x);
+						boxes.push_back(box_y);
+						boxes.push_back(box_w);
+						boxes.push_back(box_h);
+					}
+				}
+			}
+		}
+	}
+	return validCount;
+}
+static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
+{
+	float key;
+	int   key_index;
+	int   low  = left;
+	int   high = right;
+	if (left < right) {
+		key_index = indices[left];
+		key       = input[left];
+		while (low < high) {
+			while (low < high && input[high] <= key) {
+				high--;
+			}
+			input[low]   = input[high];
+			indices[low] = indices[high];
+			while (low < high && input[low] >= key) {
+				low++;
+			}
+			input[high]   = input[low];
+			indices[high] = indices[low];
+		}
+		input[low]   = key;
+		indices[low] = key_index;
+		quick_sort_indice_inverse(input, left, low - 1, indices);
+		quick_sort_indice_inverse(input, low + 1, right, indices);
+	}
+	return low;
+}
+static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
+							float ymax1)
+{
+	float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
+	float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
+	float i = w * h;
+	float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
+	return u <= 0.f ? 0.f : (i / u);
+}
+static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
+			int filterId, float threshold)
+{
+for (int i = 0; i < validCount; ++i) {
+	if (order[i] == -1 || classIds[i] != filterId) {
+		continue;
+	}
+	int n = order[i];
+	for (int j = i + 1; j < validCount; ++j) {
+		int m = order[j];
+		if (m == -1 || classIds[i] != filterId) {
+			continue;
+		}
+		float xmin0 = outputLocations[n * 4 + 0];
+		float ymin0 = outputLocations[n * 4 + 1];
+		float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
+		float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
+		float xmin1 = outputLocations[m * 4 + 0];
+		float ymin1 = outputLocations[m * 4 + 1];
+		float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
+		float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
+		float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
+		if (iou > threshold) {
+			order[j] = -1;
+		}
+	}
+}
+	return 0;
+}
+int32_t thread_func(int thread_idx){
+	printf("entry thread_func[%d]\n", thread_idx);
+	std::string image_path = "../bus.jpg";
+	std::string save_name = "out_yolov5_qnn";
+	std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin";
+	// image process
+	cv::Mat frame = cv::imread(image_path);
+	cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
+	cv::Scalar stds_scale(255, 255, 255);
+	cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
+	cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
+	float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
+	cv::Mat input_data;
+	frame_resized.convertTo(input_data, CV_32FC3);
+	cv::divide(input_data, stds_scale, input_data);
+	// model init
+	printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
+	// 以下三个接口请按需组合调用。如果不调用这些函数，默认只打印错误日志到标准错误终端。
+	Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
+	Aidlux::Aidlite::log_to_stderr();
+	// Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
+	Model* model = Model::create_instance(model_path);
+	if(model == nullptr){
+		printf("Create Model object failed !\n");
+		return EXIT_FAILURE;
+	}
+	std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
+	std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
+	model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
+	Config* config = Config::create_instance();
+	if(config == nullptr){
+		printf("Create Config object failed !\n");
+		return EXIT_FAILURE;
+	}
+	config->implement_type = ImplementType::TYPE_LOCAL;
+	config->framework_type = FrameworkType::TYPE_QNN216;
+	config->accelerate_type = AccelerateType::TYPE_DSP;
+	std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+	if(fast_interpreter == nullptr){
+		printf("build_interpretper_from_model_and_config failed !\n");
+		return EXIT_FAILURE;
+	}
+	int result = fast_interpreter->init();
+	if(result != EXIT_SUCCESS){
+		printf("interpreter->init() failed !\n");
+		return EXIT_FAILURE;
+	}
+	result = fast_interpreter->load_model();
+	if(result != EXIT_SUCCESS){
+		printf("interpreter->load_model() failed !\n");
+		return EXIT_FAILURE;
+	}
+	printf("load model load success!\n");
+	float* stride8 = nullptr;
+	float* stride16 = nullptr;
+	float* stride32 = nullptr;
+	// post_process
+	std::vector<float> filterBoxes;
+	std::vector<float> objProbs;
+	std::vector<int>   classId;
+	double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
+	int _counter = 10;
+	for(int idx = 0; idx < _counter; ++idx){
+		std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
+		void* input_tensor_data = (void*)input_data.data;
+		result = fast_interpreter->set_input_tensor(0,input_tensor_data);
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->set_input_tensor() failed !\n");
+			return EXIT_FAILURE;
+		}
+		std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
+		std::chrono::steady_clock::duration dur0 = et0 - st0;
+		printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
+		sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
+		std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
+		result = fast_interpreter->invoke();
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->invoke() failed !\n");
+			return EXIT_FAILURE;
+		}
+		std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
+		std::chrono::steady_clock::duration dur1 = et1 - st1;
+		printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
+		sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
+		std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
+		uint32_t output_tensor_length_0 = 0;
+		result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->get_output_tensor() 0 failed !\n");
+			return EXIT_FAILURE;
+		}
+		printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
+		uint32_t output_tensor_length_1 = 0;
+		result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->get_output_tensor() 1 failed !\n");
+			return EXIT_FAILURE;
+		}
+		printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
+		uint32_t output_tensor_length_2 = 0;
+		result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->get_output_tensor() 2 failed !\n");
+			return EXIT_FAILURE;
+		}
+		printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
+		std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
+		std::chrono::steady_clock::duration dur2 = et2 - st2;
+		printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
+		sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
+	}
+	printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
+	std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
+	filterBoxes.clear();
+	objProbs.clear();
+	classId.clear();
+	int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
+	int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
+	int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
+	int validCount = validCount0 + validCount1 +validCount2;
+	std::vector<int> indexArray;
+	for (int i = 0; i < validCount; ++i){
+		indexArray.push_back(i);
+	}
+	quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
+	std::set<int> class_set(std::begin(classId), std::end(classId));
+	for (auto c : class_set) {
+		nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
+	}
+	std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
+	std::chrono::steady_clock::duration durpp = ppe - pps;
+	printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
+	// 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
+	const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
+	const float expected_box_5[1][4] = {{91,  131, 551, 464}};
+	unsigned int box_count = 0;
+	unsigned int verify_pass_count = 0;
+	for (int i = 0; i < validCount; ++i) {
+		if (indexArray[i] == -1) {
+			continue;
+		}
+		int n = indexArray[i];
+		float x1       = filterBoxes[n * 4 + 0] * scale;
+		float y1       = filterBoxes[n * 4 + 1] * scale;
+		float x2       = x1 + filterBoxes[n * 4 + 2] * scale;
+		float y2       = y1 + filterBoxes[n * 4 + 3] * scale;
+		int   id       = classId[n];
+		float obj_conf = objProbs[i];
+	//  string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
+		string show_info = class_names[id] + ": " + to_string(obj_conf);
+		cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2);   // color-BGR
+		cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
+		// 结果正确性验证
+		printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
+		++box_count;
+		if(id == 0){
+			for(int idx = 0; idx < 3; ++idx){
+				float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
+							expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
+				printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
+				if(coverage_ratio > 0.9){
+					++verify_pass_count;
+					break;
+				}
+			}
+		}else if(id == 5){
+			for(int idx = 0; idx < 1; ++idx){
+				float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
+							expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
+				printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
+				if(coverage_ratio > 0.9){
+					++verify_pass_count;
+					break;
+				}
+			}
+		}else{
+			printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
+			return EXIT_FAILURE;
+		}
+	}
+	// 保存结果图片
+	cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
+	cv::imwrite("result.jpg", frame);
+	result = fast_interpreter->destory();
+	if(result != EXIT_SUCCESS){
+		printf("interpreter->destory() failed !\n");
+		return EXIT_FAILURE;
+	}
+	printf("exit thread_func[%d]\n", thread_idx);
+	return EXIT_SUCCESS;
+}
+int main(int argc, char** args)
+{
+	std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
+	if(EXIT_SUCCESS != thread_01_result.get()){
+		printf("ERROR : thread_01 run failed.\n");
+		return EXIT_FAILURE;
+	}
+	printf("Exit main function .\n");
+	return 0;
 }

model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/models/cutoff_yolov5n_w8a8.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d8e9700e6f44fc35b2bc8d6bd459d3acda007e4dc09bd92ab450ac1b0f358cb
+size 2124248

model_farm_yolov5n_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py CHANGED Viewed

@@ -1,338 +1,338 @@
-import time
-import numpy as np
-import cv2
-import aidlite
-import argparse
-import os
-OBJ_CLASS_NUM = 80
-NMS_THRESH = 0.45
-BOX_THRESH = 0.5
-MODEL_SIZE = 640
-OBJ_NUMB_MAX_SIZE = 64
-PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
-STRIDE8_SIZE = (MODEL_SIZE / 8)
-STRIDE16_SIZE = (MODEL_SIZE / 16)
-STRIDE32_SIZE = (MODEL_SIZE / 32)
-anchors = [[10, 13, 16, 30, 33, 23],
-           [30, 61, 62, 45, 59, 119],
-           [116, 90, 156, 198, 373, 326]]
-current_p =os.path.dirname(os.path.abspath(__file__))
-coco_class = [
-    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
-    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
-    'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
-    'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
-    'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
-    'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
-    'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
-    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
-def eqprocess(image, size1, size2):
-    h, w, _ = image.shape
-    mask = np.zeros((size1, size2, 3), dtype=np.float32)
-    scale1 = h / size1
-    scale2 = w / size2
-    if scale1 > scale2:
-        scale = scale1
-    else:
-        scale = scale2
-    img = cv2.resize(image, (int(w / scale), int(h / scale)))
-    mask[:int(h / scale), :int(w / scale), :] = img
-    return mask, scale
-def xywh2xyxy(x):
-    '''
-    Box (center x, center y, width, height) to (x1, y1, x2, y2)
-    '''
-    y = np.copy(x)
-    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
-    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
-    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
-    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
-    return y
-def xyxy2xywh(box):
-    '''
-    Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
-    '''
-    box[:, 2:] = box[:, 2:] - box[:, :2]
-    return box
-def NMS(dets, scores, thresh):
-    '''
-    单类NMS算法
-    dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
-    '''
-    x1 = dets[:, 0]
-    y1 = dets[:, 1]
-    x2 = dets[:, 2]
-    y2 = dets[:, 3]
-    areas = (y2 - y1 + 1) * (x2 - x1 + 1)
-    keep = []
-    index = scores.argsort()[::-1]
-    while index.size > 0:
-        i = index[0]  # every time the first is the biggst, and add it directly
-        keep.append(i)
-        x11 = np.maximum(x1[i], x1[index[1:]])  # calculate the points of overlap
-        y11 = np.maximum(y1[i], y1[index[1:]])
-        x22 = np.minimum(x2[i], x2[index[1:]])
-        y22 = np.minimum(y2[i], y2[index[1:]])
-        w = np.maximum(0, x22 - x11 + 1)  # the weights of overlap
-        h = np.maximum(0, y22 - y11 + 1)  # the height of overlap
-        overlaps = w * h
-        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
-        idx = np.where(ious <= thresh)[0]
-        index = index[idx + 1]  # because index start from 1
-    return keep
-def clip_coords(boxes, img_shape):
-    # Clip bounding xyxy bounding boxes to image shape (height, width)
-    boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0])  # x1
-    boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1])  # y1
-    boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2])  # x2
-    boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3])  # y2
-def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
-    '''
-    检测输出后处理
-    prediction: aidlite模型预测输出
-    img0shape: 原始图片shape
-    img1shape: 输入图片shape
-    conf_thres: 置信度阈值
-    iou_thres: IOU阈值
-    return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
-    '''
-    h, w, _ = img1shape
-    valid_condidates = prediction[prediction[..., 4] > conf_thres]
-    valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
-    valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
-    max_det = 300
-    max_wh = 7680
-    max_nms = 30000
-    valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
-    valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
-    sort_id = np.argsort(valid_condidates[:, 4])[::-1]
-    valid_condidates = valid_condidates[sort_id[:max_nms]]
-    boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
-    index = NMS(boxes, scores, iou_thres)[:max_det]
-    out_boxes = valid_condidates[index]
-    clip_coords(out_boxes[:, :4], img0shape)
-    out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
-    print("检测到{}个区域".format(len(out_boxes)))
-    return out_boxes
-def draw_detect_res(img, det_pred):
-    '''
-    检测结果绘制
-    '''
-    img = img.astype(np.uint8)
-    color_step = int(255 / len(coco_class))
-    for i in range(len(det_pred)):
-        x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
-        score = det_pred[i][4]
-        cls_id = int(det_pred[i][5])
-        print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
-        cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-        cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
-                      thickness=2)
-    return img
-class Detect():
-    # YOLOv5 Detect head for detection models
-    def __init__(self, nc=80, anchors=(), stride=[], image_size=640):  # detection layer
-        super().__init__()
-        self.nc = nc  # number of classes
-        self.no = nc + 5  # number of outputs per anchor
-        self.stride = stride
-        self.nl = len(anchors)  # number of detection layers
-        self.na = len(anchors[0]) // 2  # number of anchors
-        self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
-        self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
-        base_scale = image_size // 8
-        for i in range(self.nl):
-            self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
-    def _make_grid(self, nx=20, ny=20, i=0):
-        y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
-        yv, xv = np.meshgrid(y, x)
-        yv, xv = yv.T, xv.T
-        # add grid offset, i.e. y = 2.0 * x - 0.5
-        grid = np.stack((xv, yv), 2)
-        grid = grid[np.newaxis, np.newaxis, ...]
-        grid = np.repeat(grid, self.na, axis=1) - 0.5
-        anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
-        anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
-        anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
-        return grid, anchor_grid
-    def sigmoid(self, arr):
-        return 1 / (1 + np.exp(-arr))
-    def __call__(self, x):
-        z = []  # inference output
-        for i in range(self.nl):
-            bs, _, ny, nx = x[i].shape
-            x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
-            y = self.sigmoid(x[i])
-            y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i]  # xy
-            y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
-            z.append(y.reshape(bs, self.na * nx * ny, self.no))
-        return np.concatenate(z, 1)
-def main():
-    args = parser_args()
-    target_model = args.target_model
-    model_type = args.model_type
-    size = int(args.size)
-    imgs = args.imgs
-    invoke_nums = int(args.invoke_nums)
-    print("Start main ... ...")
-    # aidlite.set_log_level(aidlite.LogLevel.INFO)
-    # aidlite.log_to_stderr()
-    # print(f"Aidlite library version : {aidlite.get_library_version()}")
-    # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
-    config = aidlite.Config.create_instance()
-    if config is None:
-        print("Create config failed !")
-        return False
-    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
-    if model_type.lower()=="qnn":
-        config.framework_type = aidlite.FrameworkType.TYPE_QNN
-    elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
-        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
-    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
-    config.is_quantify_model = 1
-    model = aidlite.Model.create_instance(target_model)
-    if model is None:
-        print("Create model failed !")
-        return False
-    input_shapes = [[1, size, size, 3]]
-    output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
-    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
-                               output_shapes, aidlite.DataType.TYPE_FLOAT32)
-    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
-    if interpreter is None:
-        print("build_interpretper_from_model_and_config failed !")
-        return None
-    result = interpreter.init()
-    if result != 0:
-        print(f"interpreter init failed !")
-        return False
-    result = interpreter.load_model()
-    if result != 0:
-        print("interpreter load model failed !")
-        return False
-    print("detect model load success!")
-    # image process
-    frame = cv2.imread(imgs)
-    # 图片做等比缩放
-    img_processed = np.copy(frame)
-    [height, width, _] = img_processed.shape
-    length = max((height, width))
-    scale = length / size
-    ratio=[scale,scale]
-    image = np.zeros((length, length, 3), np.uint8)
-    image[0:height, 0:width] = img_processed
-    img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    img_input=cv2.resize(img_input,(size,size))
-    mean_data=[0, 0, 0]
-    std_data=[255, 255, 255]
-    img_input = (img_input-mean_data)/std_data  # HWC
-    img_input = img_input.astype(np.float32)
-    # qnn run
-    invoke_time=[]
-    for i in range(invoke_nums):
-        result = interpreter.set_input_tensor(0, img_input.data)
-        if result != 0:
-            print("interpreter set_input_tensor() failed")
-        t1=time.time()
-        result = interpreter.invoke()
-        cost_time = (time.time()-t1)*1000
-        invoke_time.append(cost_time)
-        if result != 0:
-            print("interpreter set_input_tensor() failed")
-        stride8 = interpreter.get_output_tensor(0)
-        stride16 = interpreter.get_output_tensor(1)
-        stride32 = interpreter.get_output_tensor(2)
-    result = interpreter.destory()
-    ## time 统计
-    max_invoke_time = max(invoke_time)
-    min_invoke_time = min(invoke_time)
-    mean_invoke_time = sum(invoke_time)/invoke_nums
-    var_invoketime=np.var(invoke_time)
-    print("=======================================")
-    print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
-    print("=======================================")
-    ##  后处理
-    stride = [8, 16, 32]
-    yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
-    validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
-    validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
-    validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
-    pred = yolo_head([validCount0, validCount1, validCount2])
-    det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
-    det_pred[np.isnan(det_pred)] = 0.0
-    det_pred[:, :4] = det_pred[:, :4] * scale
-    res_img = draw_detect_res(frame, det_pred)
-    save_path=os.path.join(current_p,"result.jpg")
-    cv2.imwrite(save_path, res_img)
-    print("图片保存在",save_path)
-    print("=======================================")
-    return True
-image_path = os.path.join(current_p,"bus.jpg")
-def parser_args():
-    parser = argparse.ArgumentParser(description="Run model benchmarks")
-    parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem'),help="inference model path")
-    parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
-    parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
-    parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
-    parser.add_argument('--size',type=str,default=640,help="model input size")
-    args = parser.parse_args()
-    return args
-if __name__ == "__main__":
-    main()

+import time
+import numpy as np
+import cv2
+import aidlite
+import argparse
+import os
+OBJ_CLASS_NUM = 80
+NMS_THRESH = 0.45
+BOX_THRESH = 0.5
+MODEL_SIZE = 640
+OBJ_NUMB_MAX_SIZE = 64
+PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
+STRIDE8_SIZE = (MODEL_SIZE / 8)
+STRIDE16_SIZE = (MODEL_SIZE / 16)
+STRIDE32_SIZE = (MODEL_SIZE / 32)
+anchors = [[10, 13, 16, 30, 33, 23],
+           [30, 61, 62, 45, 59, 119],
+           [116, 90, 156, 198, 373, 326]]
+current_p =os.path.dirname(os.path.abspath(__file__))
+coco_class = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
+    'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
+    'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
+    'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
+    'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
+    'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
+    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
+def eqprocess(image, size1, size2):
+    h, w, _ = image.shape
+    mask = np.zeros((size1, size2, 3), dtype=np.float32)
+    scale1 = h / size1
+    scale2 = w / size2
+    if scale1 > scale2:
+        scale = scale1
+    else:
+        scale = scale2
+    img = cv2.resize(image, (int(w / scale), int(h / scale)))
+    mask[:int(h / scale), :int(w / scale), :] = img
+    return mask, scale
+def xywh2xyxy(x):
+    '''
+    Box (center x, center y, width, height) to (x1, y1, x2, y2)
+    '''
+    y = np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def xyxy2xywh(box):
+    '''
+    Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
+    '''
+    box[:, 2:] = box[:, 2:] - box[:, :2]
+    return box
+def NMS(dets, scores, thresh):
+    '''
+    单类NMS算法
+    dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
+    '''
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    areas = (y2 - y1 + 1) * (x2 - x1 + 1)
+    keep = []
+    index = scores.argsort()[::-1]
+    while index.size > 0:
+        i = index[0]  # every time the first is the biggst, and add it directly
+        keep.append(i)
+        x11 = np.maximum(x1[i], x1[index[1:]])  # calculate the points of overlap
+        y11 = np.maximum(y1[i], y1[index[1:]])
+        x22 = np.minimum(x2[i], x2[index[1:]])
+        y22 = np.minimum(y2[i], y2[index[1:]])
+        w = np.maximum(0, x22 - x11 + 1)  # the weights of overlap
+        h = np.maximum(0, y22 - y11 + 1)  # the height of overlap
+        overlaps = w * h
+        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
+        idx = np.where(ious <= thresh)[0]
+        index = index[idx + 1]  # because index start from 1
+    return keep
+def clip_coords(boxes, img_shape):
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0])  # x1
+    boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1])  # y1
+    boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2])  # x2
+    boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3])  # y2
+def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
+    '''
+    检测输出后处理
+    prediction: aidlite模型预测输出
+    img0shape: 原始图片shape
+    img1shape: 输入图片shape
+    conf_thres: 置信度阈值
+    iou_thres: IOU阈值
+    return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
+    '''
+    h, w, _ = img1shape
+    valid_condidates = prediction[prediction[..., 4] > conf_thres]
+    valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
+    valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
+    max_det = 300
+    max_wh = 7680
+    max_nms = 30000
+    valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
+    valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
+    sort_id = np.argsort(valid_condidates[:, 4])[::-1]
+    valid_condidates = valid_condidates[sort_id[:max_nms]]
+    boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
+    index = NMS(boxes, scores, iou_thres)[:max_det]
+    out_boxes = valid_condidates[index]
+    clip_coords(out_boxes[:, :4], img0shape)
+    out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
+    print("检测到{}个区域".format(len(out_boxes)))
+    return out_boxes
+def draw_detect_res(img, det_pred):
+    '''
+    检测结果绘制
+    '''
+    img = img.astype(np.uint8)
+    color_step = int(255 / len(coco_class))
+    for i in range(len(det_pred)):
+        x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
+        score = det_pred[i][4]
+        cls_id = int(det_pred[i][5])
+        print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
+        cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
+                      thickness=2)
+    return img
+class Detect():
+    # YOLOv5 Detect head for detection models
+    def __init__(self, nc=80, anchors=(), stride=[], image_size=640):  # detection layer
+        super().__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.stride = stride
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
+        self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
+        base_scale = image_size // 8
+        for i in range(self.nl):
+            self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
+    def _make_grid(self, nx=20, ny=20, i=0):
+        y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
+        yv, xv = np.meshgrid(y, x)
+        yv, xv = yv.T, xv.T
+        # add grid offset, i.e. y = 2.0 * x - 0.5
+        grid = np.stack((xv, yv), 2)
+        grid = grid[np.newaxis, np.newaxis, ...]
+        grid = np.repeat(grid, self.na, axis=1) - 0.5
+        anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
+        anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
+        anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
+        return grid, anchor_grid
+    def sigmoid(self, arr):
+        return 1 / (1 + np.exp(-arr))
+    def __call__(self, x):
+        z = []  # inference output
+        for i in range(self.nl):
+            bs, _, ny, nx = x[i].shape
+            x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
+            y = self.sigmoid(x[i])
+            y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i]  # xy
+            y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+            z.append(y.reshape(bs, self.na * nx * ny, self.no))
+        return np.concatenate(z, 1)
+def main():
+    args = parser_args()
+    target_model = args.target_model
+    model_type = args.model_type
+    size = int(args.size)
+    imgs = args.imgs
+    invoke_nums = int(args.invoke_nums)
+    print("Start main ... ...")
+    # aidlite.set_log_level(aidlite.LogLevel.INFO)
+    # aidlite.log_to_stderr()
+    # print(f"Aidlite library version : {aidlite.get_library_version()}")
+    # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
+    config = aidlite.Config.create_instance()
+    if config is None:
+        print("Create config failed !")
+        return False
+    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+    if model_type.lower()=="qnn":
+        config.framework_type = aidlite.FrameworkType.TYPE_QNN
+    elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
+        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
+    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+    config.is_quantify_model = 1
+    model = aidlite.Model.create_instance(target_model)
+    if model is None:
+        print("Create model failed !")
+        return False
+    input_shapes = [[1, size, size, 3]]
+    output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
+    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                               output_shapes, aidlite.DataType.TYPE_FLOAT32)
+    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
+    if interpreter is None:
+        print("build_interpretper_from_model_and_config failed !")
+        return None
+    result = interpreter.init()
+    if result != 0:
+        print(f"interpreter init failed !")
+        return False
+    result = interpreter.load_model()
+    if result != 0:
+        print("interpreter load model failed !")
+        return False
+    print("detect model load success!")
+    # image process
+    frame = cv2.imread(imgs)
+    # 图片做等比缩放
+    img_processed = np.copy(frame)
+    [height, width, _] = img_processed.shape
+    length = max((height, width))
+    scale = length / size
+    ratio=[scale,scale]
+    image = np.zeros((length, length, 3), np.uint8)
+    image[0:height, 0:width] = img_processed
+    img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    img_input=cv2.resize(img_input,(size,size))
+    mean_data=[0, 0, 0]
+    std_data=[255, 255, 255]
+    img_input = (img_input-mean_data)/std_data  # HWC
+    img_input = img_input.astype(np.float32)
+    # qnn run
+    invoke_time=[]
+    for i in range(invoke_nums):
+        result = interpreter.set_input_tensor(0, img_input.data)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        t1=time.time()
+        result = interpreter.invoke()
+        cost_time = (time.time()-t1)*1000
+        invoke_time.append(cost_time)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        stride8 = interpreter.get_output_tensor(0)
+        stride16 = interpreter.get_output_tensor(1)
+        stride32 = interpreter.get_output_tensor(2)
+    result = interpreter.destory()
+    ## time 统计
+    max_invoke_time = max(invoke_time)
+    min_invoke_time = min(invoke_time)
+    mean_invoke_time = sum(invoke_time)/invoke_nums
+    var_invoketime=np.var(invoke_time)
+    print("=======================================")
+    print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+    print("=======================================")
+    ##  后处理
+    stride = [8, 16, 32]
+    yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
+    validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
+    validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
+    validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
+    pred = yolo_head([validCount0, validCount1, validCount2])
+    det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
+    det_pred[np.isnan(det_pred)] = 0.0
+    det_pred[:, :4] = det_pred[:, :4] * scale
+    res_img = draw_detect_res(frame, det_pred)
+    save_path=os.path.join(current_p,"result.jpg")
+    cv2.imwrite(save_path, res_img)
+    print("图片保存在",save_path)
+    print("=======================================")
+    return True
+image_path = os.path.join(current_p,"bus.jpg")
+def parser_args():
+    parser = argparse.ArgumentParser(description="Run model benchmarks")
+    parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin'),help="inference model path")
+    parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
+    parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
+    parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
+    parser.add_argument('--size',type=str,default=640,help="model input size")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    main()

model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/README.md CHANGED Viewed

@@ -1,60 +1,60 @@
-## Model Information
-### Source model
-- Input shape: 640x640
-- Number of parameters: 1.968M
-- Model size: 7.56 MB
-- Output shape: 1x25200x85
-Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
-### Converted model
-- Precision: INT8
-- Backend: QNN2.16
-- Target Device: SNM972
-## Inference with AidLite SDK
-### SDK installation
-Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
-- install AidLite SDK
-```bash
-# Install the appropriate version of the aidlite sdk
-sudo aid-pkg update
-sudo aid-pkg install aidlite-sdk
-# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
-sudo aid-pkg install aidlite-{QNN VERSION}
-# eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
-```
-- Verify AidLite SDK
-```bash
-# aidlite sdk c++ check
-python3 -c "import aidlite; print(aidlite.get_library_version())"
-# aidlite sdk python check
-python3 -c "import aidlite; print(aidlite.get_py_library_version())"
-```
-### Run python demo
-```bash
-cd python
-python3 demo_qnn.py
-```
-### Run c++ demo
-```bash
-cd yolov5n/model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp
-mkdir build
-cd build
-cmake ..
-make
-./run_yolov5
 ```

+## Model Information
+### Source model
+- Input shape: 640x640
+- Number of parameters: 1.968M
+- Model size: 7.56 MB
+- Output shape: 1x25200x85
+Source model repository: [yolov5](https://github.com/ultralytics/yolov5)
+### Converted model
+- Precision: INT8
+- Backend: QNN2.16
+- Target Device: SNM972
+## Inference with AidLite SDK
+### SDK installation
+Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
+- install AidLite SDK
+```bash
+# Install the appropriate version of the aidlite sdk
+sudo aid-pkg update
+sudo aid-pkg install aidlite-sdk
+# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+sudo aid-pkg install aidlite-{QNN VERSION}
+# eg: Install QNN 2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
+```
+- Verify AidLite SDK
+```bash
+# aidlite sdk c++ check
+python3 -c "import aidlite; print(aidlite.get_library_version())"
+# aidlite sdk python check
+python3 -c "import aidlite; print(aidlite.get_py_library_version())"
+```
+### Run python demo
+```bash
+cd python
+python3 demo_qnn.py
+```
+### Run c++ demo
+```bash
+cd yolov5n/model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp
+mkdir build
+cd build
+cmake ..
+make
+./run_yolov5
 ```

model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/CMakeLists.txt CHANGED Viewed

@@ -1,36 +1,36 @@
-cmake_minimum_required(VERSION 3.12)
-project(aidlite_cpp_samples)
-set(CMAKE_BUILD_TYPE Release)
-set(OPENCV_INCLUDE_DIR      /usr/include/opencv4)
-set(OPENCV_LINK_DIR         "")
-set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库  需要注意先后顺序
-set(AIDLITE_INCLUDE_DIR     /usr/local/include)
-set(AIDLITE_LINK_DIR        /usr/local/lib)
-set(AIDLITE_LIB aidlite)
-function(func_generate_sample_exe sample_name)
-    set(demo_name ${sample_name})
-    file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
-    add_executable(${demo_name} ${src_files})
-    target_compile_options(${demo_name} PRIVATE -std=c++11)
-    target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
-    target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
-    target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
-    message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
-endfunction()
-set(SAMPLE_LIST run_yolov5)
-FOREACH(sample ${SAMPLE_LIST})
-    message("prepare to generate cpp sample : ${sample}")
-    func_generate_sample_exe(${sample})
 ENDFOREACH(sample)

+cmake_minimum_required(VERSION 3.12)
+project(aidlite_cpp_samples)
+set(CMAKE_BUILD_TYPE Release)
+set(OPENCV_INCLUDE_DIR      /usr/include/opencv4)
+set(OPENCV_LINK_DIR         "")
+set(OPENCV_LIBS opencv_imgcodecs opencv_imgproc opencv_core) # 如果是静态库  需要注意先后顺序
+set(AIDLITE_INCLUDE_DIR     /usr/local/include)
+set(AIDLITE_LINK_DIR        /usr/local/lib)
+set(AIDLITE_LIB aidlite)
+function(func_generate_sample_exe sample_name)
+    set(demo_name ${sample_name})
+    file(GLOB src_files ${CMAKE_CURRENT_SOURCE_DIR}/${demo_name}.cpp)
+    add_executable(${demo_name} ${src_files})
+    target_compile_options(${demo_name} PRIVATE -std=c++11)
+    target_include_directories(${demo_name} PUBLIC ${OPENCV_INCLUDE_DIR} ${AIDLITE_INCLUDE_DIR})
+    target_link_directories(${demo_name} PUBLIC ${OPENCV_LINK_DIR} ${AIDLITE_LINK_DIR})
+    target_link_libraries(${demo_name} PUBLIC ${AIDLITE_LIB} ${OPENCV_LIBS} pthread)
+    message(STATUS "[CMAKEMSG] ${demo_name} need libraries is : ${AIDLITE_LIB} ${OPENCV_LIBS}")
+endfunction()
+set(SAMPLE_LIST run_yolov5)
+FOREACH(sample ${SAMPLE_LIST})
+    message("prepare to generate cpp sample : ${sample}")
+    func_generate_sample_exe(${sample})
 ENDFOREACH(sample)

model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/cpp/run_yolov5.cpp CHANGED Viewed

@@ -1,455 +1,455 @@
-#include <thread>
-#include <future>
-#include <opencv2/opencv.hpp>
-#include "aidlux/aidlite/aidlite.hpp"
-using namespace Aidlux::Aidlite;
-using namespace std;
-#define OBJ_CLASS_NUM     80
-#define NMS_THRESH        0.45
-#define BOX_THRESH        0.5
-#define MODEL_SIZE 640
-#define OBJ_NUMB_MAX_SIZE 64
-#define PROP_BOX_SIZE     (5+OBJ_CLASS_NUM)
-#define STRIDE8_SIZE (MODEL_SIZE / 8)
-#define STRIDE16_SIZE (MODEL_SIZE / 16)
-#define STRIDE32_SIZE (MODEL_SIZE / 32)
-const float anchor0[6] = {10, 13, 16, 30, 33, 23};
-const float anchor1[6] = {30, 61, 62, 45, 59, 119};
-const float anchor2[6] = {116, 90, 156, 198, 373, 326};
-string class_names[] = {
-	"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
-	"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
-	"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
-	"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
-	"wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
-	"carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
-	"tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
-	"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
-static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
-float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
-{
-	int w =  src->cols;
-	int h =  src->rows;
-	float scale_h = float(h) / float(height);
-	float scale_w = float(w) / float(width);
-	float scale;
-	if (scale_h > scale_w)
-	{
-	scale = scale_h;
-	}
-	else
-	{
-	scale = scale_w;
-	}
-	int rel_width = int(w / scale);
-	int rel_height = int(h / scale);
-	cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
-	cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
-	return scale;
-}
-std::vector<std::string> split(const std::string& str)
-{
-	std::stringstream ss(str);
-	std::vector<std::string> elems;
-	std::string item;
-	while (std::getline(ss, item, ','))
-	{
-		elems.push_back(item);
-	}
-	return elems;
-}
-int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
-{
-	int ct = 0;
-	int validCount = 0;
-	for (int a = 0; a < 3; a++)
-	{
-		for (int i = 0; i < grid_h; i++)
-		{
-			for (int j = 0; j < grid_w; j++)
-			{
-				int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
-				float box_confidence = sigmoid(output[idx + 4]);
-				if (box_confidence >= BOX_THRESH )
-				{
-					float box_x  = sigmoid(output[idx]) * 2 - 0.5;
-					float box_y  = sigmoid(output[idx + 1]) * 2 - 0.5;
-					float box_w  = pow(sigmoid(output[idx + 2]) * 2, 2);
-					float box_h  = pow(sigmoid(output[idx + 3]) * 2, 2);
-					box_x = (box_x + j) * (float)stride;
-					box_y = (box_y + i) * (float)stride;
-					box_w = box_w * anchor[a * 2];
-					box_h = box_h * anchor[a * 2 + 1];
-					box_x -= (box_w / 2.0);
-					box_y -= (box_h / 2.0);
-					float maxClassProbs = 0;
-					int maxClassId = 0;
-					for(int k = 0; k < OBJ_CLASS_NUM ; k++)
-					{
-						float prob = output[idx + 5 + k];
-						if (prob > maxClassProbs)
-						{
-							maxClassId = k;
-							maxClassProbs = prob;
-						}
-					}
-					if (maxClassProbs > BOX_THRESH)
-					{
-						objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
-						classId.push_back(maxClassId);
-						validCount++;
-						boxes.push_back(box_x);
-						boxes.push_back(box_y);
-						boxes.push_back(box_w);
-						boxes.push_back(box_h);
-					}
-				}
-			}
-		}
-	}
-	return validCount;
-}
-static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
-{
-	float key;
-	int   key_index;
-	int   low  = left;
-	int   high = right;
-	if (left < right) {
-		key_index = indices[left];
-		key       = input[left];
-		while (low < high) {
-			while (low < high && input[high] <= key) {
-				high--;
-			}
-			input[low]   = input[high];
-			indices[low] = indices[high];
-			while (low < high && input[low] >= key) {
-				low++;
-			}
-			input[high]   = input[low];
-			indices[high] = indices[low];
-		}
-		input[low]   = key;
-		indices[low] = key_index;
-		quick_sort_indice_inverse(input, left, low - 1, indices);
-		quick_sort_indice_inverse(input, low + 1, right, indices);
-	}
-	return low;
-}
-static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
-							float ymax1)
-{
-	float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
-	float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
-	float i = w * h;
-	float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
-	return u <= 0.f ? 0.f : (i / u);
-}
-static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
-			int filterId, float threshold)
-{
-for (int i = 0; i < validCount; ++i) {
-	if (order[i] == -1 || classIds[i] != filterId) {
-		continue;
-	}
-	int n = order[i];
-	for (int j = i + 1; j < validCount; ++j) {
-		int m = order[j];
-		if (m == -1 || classIds[i] != filterId) {
-			continue;
-		}
-		float xmin0 = outputLocations[n * 4 + 0];
-		float ymin0 = outputLocations[n * 4 + 1];
-		float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
-		float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
-		float xmin1 = outputLocations[m * 4 + 0];
-		float ymin1 = outputLocations[m * 4 + 1];
-		float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
-		float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
-		float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
-		if (iou > threshold) {
-			order[j] = -1;
-		}
-	}
-}
-	return 0;
-}
-int32_t thread_func(int thread_idx){
-	printf("entry thread_func[%d]\n", thread_idx);
-	std::string image_path = "../bus.jpg";
-	std::string save_name = "out_yolov5_qnn";
-	std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem";
-	// image process
-	cv::Mat frame = cv::imread(image_path);
-	cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
-	cv::Scalar stds_scale(255, 255, 255);
-	cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
-	cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
-	float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
-	cv::Mat input_data;
-	frame_resized.convertTo(input_data, CV_32FC3);
-	cv::divide(input_data, stds_scale, input_data);
-	// model init
-	printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
-	// 以下三个接口请按需组合调用。如果不调用这些函数，默认只打印错误日志到标准错误终端。
-	Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
-	Aidlux::Aidlite::log_to_stderr();
-	// Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
-	Model* model = Model::create_instance(model_path);
-	if(model == nullptr){
-		printf("Create Model object failed !\n");
-		return EXIT_FAILURE;
-	}
-	std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
-	std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
-	model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
-	Config* config = Config::create_instance();
-	if(config == nullptr){
-		printf("Create Config object failed !\n");
-		return EXIT_FAILURE;
-	}
-	config->implement_type = ImplementType::TYPE_LOCAL;
-	config->framework_type = FrameworkType::TYPE_QNN216;
-	config->accelerate_type = AccelerateType::TYPE_DSP;
-	std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
-	if(fast_interpreter == nullptr){
-		printf("build_interpretper_from_model_and_config failed !\n");
-		return EXIT_FAILURE;
-	}
-	int result = fast_interpreter->init();
-	if(result != EXIT_SUCCESS){
-		printf("interpreter->init() failed !\n");
-		return EXIT_FAILURE;
-	}
-	result = fast_interpreter->load_model();
-	if(result != EXIT_SUCCESS){
-		printf("interpreter->load_model() failed !\n");
-		return EXIT_FAILURE;
-	}
-	printf("load model load success!\n");
-	float* stride8 = nullptr;
-	float* stride16 = nullptr;
-	float* stride32 = nullptr;
-	// post_process
-	std::vector<float> filterBoxes;
-	std::vector<float> objProbs;
-	std::vector<int>   classId;
-	double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
-	int _counter = 10;
-	for(int idx = 0; idx < _counter; ++idx){
-		std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
-		void* input_tensor_data = (void*)input_data.data;
-		result = fast_interpreter->set_input_tensor(0,input_tensor_data);
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->set_input_tensor() failed !\n");
-			return EXIT_FAILURE;
-		}
-		std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
-		std::chrono::steady_clock::duration dur0 = et0 - st0;
-		printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
-		sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
-		std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
-		result = fast_interpreter->invoke();
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->invoke() failed !\n");
-			return EXIT_FAILURE;
-		}
-		std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
-		std::chrono::steady_clock::duration dur1 = et1 - st1;
-		printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
-		sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
-		std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
-		uint32_t output_tensor_length_0 = 0;
-		result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->get_output_tensor() 0 failed !\n");
-			return EXIT_FAILURE;
-		}
-		printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
-		uint32_t output_tensor_length_1 = 0;
-		result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->get_output_tensor() 1 failed !\n");
-			return EXIT_FAILURE;
-		}
-		printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
-		uint32_t output_tensor_length_2 = 0;
-		result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
-		if(result != EXIT_SUCCESS){
-			printf("interpreter->get_output_tensor() 2 failed !\n");
-			return EXIT_FAILURE;
-		}
-		printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
-		std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
-		std::chrono::steady_clock::duration dur2 = et2 - st2;
-		printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
-		sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
-	}
-	printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
-	std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
-	filterBoxes.clear();
-	objProbs.clear();
-	classId.clear();
-	int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
-	int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
-	int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
-	int validCount = validCount0 + validCount1 +validCount2;
-	std::vector<int> indexArray;
-	for (int i = 0; i < validCount; ++i){
-		indexArray.push_back(i);
-	}
-	quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
-	std::set<int> class_set(std::begin(classId), std::end(classId));
-	for (auto c : class_set) {
-		nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
-	}
-	std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
-	std::chrono::steady_clock::duration durpp = ppe - pps;
-	printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
-	// 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
-	const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
-	const float expected_box_5[1][4] = {{91,  131, 551, 464}};
-	unsigned int box_count = 0;
-	unsigned int verify_pass_count = 0;
-	for (int i = 0; i < validCount; ++i) {
-		if (indexArray[i] == -1) {
-			continue;
-		}
-		int n = indexArray[i];
-		float x1       = filterBoxes[n * 4 + 0] * scale;
-		float y1       = filterBoxes[n * 4 + 1] * scale;
-		float x2       = x1 + filterBoxes[n * 4 + 2] * scale;
-		float y2       = y1 + filterBoxes[n * 4 + 3] * scale;
-		int   id       = classId[n];
-		float obj_conf = objProbs[i];
-	//  string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
-		string show_info = class_names[id] + ": " + to_string(obj_conf);
-		cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2);   // color-BGR
-		cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
-		// 结果正确性验证
-		printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
-		++box_count;
-		if(id == 0){
-			for(int idx = 0; idx < 3; ++idx){
-				float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
-							expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
-				printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
-				if(coverage_ratio > 0.9){
-					++verify_pass_count;
-					break;
-				}
-			}
-		}else if(id == 5){
-			for(int idx = 0; idx < 1; ++idx){
-				float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
-							expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
-				printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
-				if(coverage_ratio > 0.9){
-					++verify_pass_count;
-					break;
-				}
-			}
-		}else{
-			printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
-			return EXIT_FAILURE;
-		}
-	}
-	// 保存结果图片
-	cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
-	cv::imwrite("result.jpg", frame);
-	result = fast_interpreter->destory();
-	if(result != EXIT_SUCCESS){
-		printf("interpreter->destory() failed !\n");
-		return EXIT_FAILURE;
-	}
-	printf("exit thread_func[%d]\n", thread_idx);
-	return EXIT_SUCCESS;
-}
-int main(int argc, char** args)
-{
-	std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
-	if(EXIT_SUCCESS != thread_01_result.get()){
-		printf("ERROR : thread_01 run failed.\n");
-		return EXIT_FAILURE;
-	}
-	printf("Exit main function .\n");
-	return 0;
 }

+#include <thread>
+#include <future>
+#include <opencv2/opencv.hpp>
+#include "aidlux/aidlite/aidlite.hpp"
+using namespace Aidlux::Aidlite;
+using namespace std;
+#define OBJ_CLASS_NUM     80
+#define NMS_THRESH        0.45
+#define BOX_THRESH        0.5
+#define MODEL_SIZE 640
+#define OBJ_NUMB_MAX_SIZE 64
+#define PROP_BOX_SIZE     (5+OBJ_CLASS_NUM)
+#define STRIDE8_SIZE (MODEL_SIZE / 8)
+#define STRIDE16_SIZE (MODEL_SIZE / 16)
+#define STRIDE32_SIZE (MODEL_SIZE / 32)
+const float anchor0[6] = {10, 13, 16, 30, 33, 23};
+const float anchor1[6] = {30, 61, 62, 45, 59, 119};
+const float anchor2[6] = {116, 90, 156, 198, 373, 326};
+string class_names[] = {
+	"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+	"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
+	"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
+	"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+	"wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
+	"carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
+	"tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
+	"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
+static float sigmoid(float x) { return 1.f / (1.f + exp(-x)); }
+float eqprocess(cv::Mat* src, cv::Mat* dst, int width, int height)
+{
+	int w =  src->cols;
+	int h =  src->rows;
+	float scale_h = float(h) / float(height);
+	float scale_w = float(w) / float(width);
+	float scale;
+	if (scale_h > scale_w)
+	{
+	scale = scale_h;
+	}
+	else
+	{
+	scale = scale_w;
+	}
+	int rel_width = int(w / scale);
+	int rel_height = int(h / scale);
+	cv::Mat tmp = (*dst)(cv::Rect(0, 0, rel_width, rel_height));
+	cv::resize(*src, tmp, cv::Size(rel_width, rel_height));
+	return scale;
+}
+std::vector<std::string> split(const std::string& str)
+{
+	std::stringstream ss(str);
+	std::vector<std::string> elems;
+	std::string item;
+	while (std::getline(ss, item, ','))
+	{
+		elems.push_back(item);
+	}
+	return elems;
+}
+int process(float* output, std::vector<float>& boxes, std::vector<float>& objProbs, std::vector<int>& classId, float * anchor, int grid_h, int grid_w, int stride, int imgsz)
+{
+	int ct = 0;
+	int validCount = 0;
+	for (int a = 0; a < 3; a++)
+	{
+		for (int i = 0; i < grid_h; i++)
+		{
+			for (int j = 0; j < grid_w; j++)
+			{
+				int idx = a * PROP_BOX_SIZE + (i * grid_w + j) * 3 * PROP_BOX_SIZE;
+				float box_confidence = sigmoid(output[idx + 4]);
+				if (box_confidence >= BOX_THRESH )
+				{
+					float box_x  = sigmoid(output[idx]) * 2 - 0.5;
+					float box_y  = sigmoid(output[idx + 1]) * 2 - 0.5;
+					float box_w  = pow(sigmoid(output[idx + 2]) * 2, 2);
+					float box_h  = pow(sigmoid(output[idx + 3]) * 2, 2);
+					box_x = (box_x + j) * (float)stride;
+					box_y = (box_y + i) * (float)stride;
+					box_w = box_w * anchor[a * 2];
+					box_h = box_h * anchor[a * 2 + 1];
+					box_x -= (box_w / 2.0);
+					box_y -= (box_h / 2.0);
+					float maxClassProbs = 0;
+					int maxClassId = 0;
+					for(int k = 0; k < OBJ_CLASS_NUM ; k++)
+					{
+						float prob = output[idx + 5 + k];
+						if (prob > maxClassProbs)
+						{
+							maxClassId = k;
+							maxClassProbs = prob;
+						}
+					}
+					if (maxClassProbs > BOX_THRESH)
+					{
+						objProbs.push_back(sigmoid(maxClassProbs) * box_confidence);
+						classId.push_back(maxClassId);
+						validCount++;
+						boxes.push_back(box_x);
+						boxes.push_back(box_y);
+						boxes.push_back(box_w);
+						boxes.push_back(box_h);
+					}
+				}
+			}
+		}
+	}
+	return validCount;
+}
+static int quick_sort_indice_inverse(std::vector<float>& input, int left, int right, std::vector<int>& indices)
+{
+	float key;
+	int   key_index;
+	int   low  = left;
+	int   high = right;
+	if (left < right) {
+		key_index = indices[left];
+		key       = input[left];
+		while (low < high) {
+			while (low < high && input[high] <= key) {
+				high--;
+			}
+			input[low]   = input[high];
+			indices[low] = indices[high];
+			while (low < high && input[low] >= key) {
+				low++;
+			}
+			input[high]   = input[low];
+			indices[high] = indices[low];
+		}
+		input[low]   = key;
+		indices[low] = key_index;
+		quick_sort_indice_inverse(input, left, low - 1, indices);
+		quick_sort_indice_inverse(input, low + 1, right, indices);
+	}
+	return low;
+}
+static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
+							float ymax1)
+{
+	float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0);
+	float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0);
+	float i = w * h;
+	float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i;
+	return u <= 0.f ? 0.f : (i / u);
+}
+static int nms(int validCount, std::vector<float>& outputLocations, std::vector<int> classIds, std::vector<int>& order,
+			int filterId, float threshold)
+{
+for (int i = 0; i < validCount; ++i) {
+	if (order[i] == -1 || classIds[i] != filterId) {
+		continue;
+	}
+	int n = order[i];
+	for (int j = i + 1; j < validCount; ++j) {
+		int m = order[j];
+		if (m == -1 || classIds[i] != filterId) {
+			continue;
+		}
+		float xmin0 = outputLocations[n * 4 + 0];
+		float ymin0 = outputLocations[n * 4 + 1];
+		float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2];
+		float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3];
+		float xmin1 = outputLocations[m * 4 + 0];
+		float ymin1 = outputLocations[m * 4 + 1];
+		float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2];
+		float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3];
+		float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1);
+		if (iou > threshold) {
+			order[j] = -1;
+		}
+	}
+}
+	return 0;
+}
+int32_t thread_func(int thread_idx){
+	printf("entry thread_func[%d]\n", thread_idx);
+	std::string image_path = "../bus.jpg";
+	std::string save_name = "out_yolov5_qnn";
+	std::string model_path = "../../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin";
+	// image process
+	cv::Mat frame = cv::imread(image_path);
+	cv::cvtColor(frame, frame , cv::COLOR_BGR2RGB);
+	cv::Scalar stds_scale(255, 255, 255);
+	cv::Size target_shape(MODEL_SIZE, MODEL_SIZE);
+	cv::Mat frame_resized = cv::Mat::zeros(MODEL_SIZE, MODEL_SIZE, CV_8UC3);
+	float scale = eqprocess(&frame, &frame_resized, MODEL_SIZE, MODEL_SIZE);
+	cv::Mat input_data;
+	frame_resized.convertTo(input_data, CV_32FC3);
+	cv::divide(input_data, stds_scale, input_data);
+	// model init
+	printf("Aidlite library version : %s\n", Aidlux::Aidlite::get_library_version().c_str());
+	// 以下三个接口请按需组合调用。如果不调用这些函数，默认只打印错误日志到标准错误终端。
+	Aidlux::Aidlite::set_log_level(Aidlux::Aidlite::LogLevel::INFO);
+	Aidlux::Aidlite::log_to_stderr();
+	// Aidlux::Aidlite::log_to_file("./qnn_yolov5_multi_");
+	Model* model = Model::create_instance(model_path);
+	if(model == nullptr){
+		printf("Create Model object failed !\n");
+		return EXIT_FAILURE;
+	}
+	std::vector<std::vector<uint32_t>> input_shapes = {{1,640,640,3}};
+	std::vector<std::vector<uint32_t>> output_shapes = {{1,40,40,255}, {1,20,20,255}, {1,80,80,255}};
+	model->set_model_properties(input_shapes, DataType::TYPE_FLOAT32, output_shapes, DataType::TYPE_FLOAT32);
+	Config* config = Config::create_instance();
+	if(config == nullptr){
+		printf("Create Config object failed !\n");
+		return EXIT_FAILURE;
+	}
+	config->implement_type = ImplementType::TYPE_LOCAL;
+	config->framework_type = FrameworkType::TYPE_QNN216;
+	config->accelerate_type = AccelerateType::TYPE_DSP;
+	std::unique_ptr<Interpreter>&& fast_interpreter = InterpreterBuilder::build_interpretper_from_model_and_config(model, config);
+	if(fast_interpreter == nullptr){
+		printf("build_interpretper_from_model_and_config failed !\n");
+		return EXIT_FAILURE;
+	}
+	int result = fast_interpreter->init();
+	if(result != EXIT_SUCCESS){
+		printf("interpreter->init() failed !\n");
+		return EXIT_FAILURE;
+	}
+	result = fast_interpreter->load_model();
+	if(result != EXIT_SUCCESS){
+		printf("interpreter->load_model() failed !\n");
+		return EXIT_FAILURE;
+	}
+	printf("load model load success!\n");
+	float* stride8 = nullptr;
+	float* stride16 = nullptr;
+	float* stride32 = nullptr;
+	// post_process
+	std::vector<float> filterBoxes;
+	std::vector<float> objProbs;
+	std::vector<int>   classId;
+	double sum_time_0 = 0.0, sum_time_1 = 0.0, sum_time_2 = 0.0;
+	int _counter = 10;
+	for(int idx = 0; idx < _counter; ++idx){
+		std::chrono::steady_clock::time_point st0 = std::chrono::steady_clock::now();
+		void* input_tensor_data = (void*)input_data.data;
+		result = fast_interpreter->set_input_tensor(0,input_tensor_data);
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->set_input_tensor() failed !\n");
+			return EXIT_FAILURE;
+		}
+		std::chrono::steady_clock::time_point et0 = std::chrono::steady_clock::now();
+		std::chrono::steady_clock::duration dur0 = et0 - st0;
+		printf("current thread_idx[%d] [%d] set_input_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur0).count()*1000);
+		sum_time_0 += std::chrono::duration<double>(dur0).count()*1000;
+		std::chrono::steady_clock::time_point st1 = std::chrono::steady_clock::now();
+		result = fast_interpreter->invoke();
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->invoke() failed !\n");
+			return EXIT_FAILURE;
+		}
+		std::chrono::steady_clock::time_point et1 = std::chrono::steady_clock::now();
+		std::chrono::steady_clock::duration dur1 = et1 - st1;
+		printf("current thread_idx[%d] [%d] invoke cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur1).count()*1000);
+		sum_time_1 += std::chrono::duration<double>(dur1).count()*1000;
+		std::chrono::steady_clock::time_point st2 = std::chrono::steady_clock::now();
+		uint32_t output_tensor_length_0 = 0;
+		result = fast_interpreter->get_output_tensor(0, (void**)&stride8, &output_tensor_length_0);
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->get_output_tensor() 0 failed !\n");
+			return EXIT_FAILURE;
+		}
+		printf("sample : interpreter->get_output_tensor() 0 length is [%d] !\n", output_tensor_length_0);
+		uint32_t output_tensor_length_1 = 0;
+		result = fast_interpreter->get_output_tensor(1, (void**)&stride16, &output_tensor_length_1);
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->get_output_tensor() 1 failed !\n");
+			return EXIT_FAILURE;
+		}
+		printf("sample : interpreter->get_output_tensor() 1 length is [%d] !\n", output_tensor_length_1);
+		uint32_t output_tensor_length_2 = 0;
+		result = fast_interpreter->get_output_tensor(2, (void**)&stride32, &output_tensor_length_2);
+		if(result != EXIT_SUCCESS){
+			printf("interpreter->get_output_tensor() 2 failed !\n");
+			return EXIT_FAILURE;
+		}
+		printf("sample : interpreter->get_output_tensor() 2 length is [%d] !\n", output_tensor_length_2);
+		std::chrono::steady_clock::time_point et2 = std::chrono::steady_clock::now();
+		std::chrono::steady_clock::duration dur2 = et2 - st2;
+		printf("current thread_idx[%d] [%d] get_output_tensor cost time : %f\n", thread_idx, idx, std::chrono::duration<double>(dur2).count()*1000);
+		sum_time_2 += std::chrono::duration<double>(dur2).count()*1000;
+	}
+	printf("repeat [%d] time , input[%f] --- invoke[%f] --- output[%f] --- sum[%f]ms\n", _counter, sum_time_0, sum_time_1, sum_time_2, sum_time_0+sum_time_1+sum_time_2);
+	std::chrono::steady_clock::time_point pps = std::chrono::steady_clock::now();
+	filterBoxes.clear();
+	objProbs.clear();
+	classId.clear();
+	int validCount0 = process(stride8, filterBoxes, objProbs, classId, (float*)anchor0, STRIDE8_SIZE, STRIDE8_SIZE, 8, MODEL_SIZE);
+	int validCount1 = process(stride16, filterBoxes, objProbs, classId, (float*)anchor1, STRIDE16_SIZE, STRIDE16_SIZE, 16, MODEL_SIZE);
+	int validCount2 = process(stride32, filterBoxes, objProbs, classId, (float*)anchor2, STRIDE32_SIZE, STRIDE32_SIZE, 32, MODEL_SIZE);
+	int validCount = validCount0 + validCount1 +validCount2;
+	std::vector<int> indexArray;
+	for (int i = 0; i < validCount; ++i){
+		indexArray.push_back(i);
+	}
+	quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray);
+	std::set<int> class_set(std::begin(classId), std::end(classId));
+	for (auto c : class_set) {
+		nms(validCount, filterBoxes, classId, indexArray, c, NMS_THRESH);
+	}
+	std::chrono::steady_clock::time_point ppe = std::chrono::steady_clock::now();
+	std::chrono::steady_clock::duration durpp = ppe - pps;
+	printf("postprocess cost time : %f ms\n", std::chrono::duration<double>(durpp).count()*1000);
+	// 数据来源于 SNPE2 FP32 CPU 运行结果 [x1, y1, x2, y2] 坐标向下取整
+	const float expected_box_0[3][4] = {{210, 241, 285, 519}, {473, 229, 560, 522}, {108, 231, 231, 542}};
+	const float expected_box_5[1][4] = {{91,  131, 551, 464}};
+	unsigned int box_count = 0;
+	unsigned int verify_pass_count = 0;
+	for (int i = 0; i < validCount; ++i) {
+		if (indexArray[i] == -1) {
+			continue;
+		}
+		int n = indexArray[i];
+		float x1       = filterBoxes[n * 4 + 0] * scale;
+		float y1       = filterBoxes[n * 4 + 1] * scale;
+		float x2       = x1 + filterBoxes[n * 4 + 2] * scale;
+		float y2       = y1 + filterBoxes[n * 4 + 3] * scale;
+		int   id       = classId[n];
+		float obj_conf = objProbs[i];
+	//  string show_info = "class " + to_string(id) + ": " + to_string(obj_conf);
+		string show_info = class_names[id] + ": " + to_string(obj_conf);
+		cv::putText(frame, show_info.c_str(), cv::Point(x1, y1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2, 2);   // color-BGR
+		cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2, 2, 0);
+		// 结果正确性验证
+		printf("Result id[%d]-x1[%f]-y1[%f]-x2[%f]-y2[%f]\n", id, x1, y1, x2, y2);
+		++box_count;
+		if(id == 0){
+			for(int idx = 0; idx < 3; ++idx){
+				float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
+							expected_box_0[idx][0], expected_box_0[idx][1], expected_box_0[idx][2], expected_box_0[idx][3]);
+				printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
+				if(coverage_ratio > 0.9){
+					++verify_pass_count;
+					break;
+				}
+			}
+		}else if(id == 5){
+			for(int idx = 0; idx < 1; ++idx){
+				float coverage_ratio = CalculateOverlap(x1, y1, x2, y2,
+							expected_box_5[idx][0], expected_box_5[idx][1], expected_box_5[idx][2], expected_box_5[idx][3]);
+				printf("Verify result : idx[%d] id[%d] coverage_ratio[%f]\n", idx, id, coverage_ratio);
+				if(coverage_ratio > 0.9){
+					++verify_pass_count;
+					break;
+				}
+			}
+		}else{
+			printf("ERROR : The Yolov5s model inference result is not the expected classification category.\n");
+			return EXIT_FAILURE;
+		}
+	}
+	// 保存结果图片
+	cv::cvtColor(frame, frame , cv::COLOR_RGB2BGR);
+	cv::imwrite("result.jpg", frame);
+	result = fast_interpreter->destory();
+	if(result != EXIT_SUCCESS){
+		printf("interpreter->destory() failed !\n");
+		return EXIT_FAILURE;
+	}
+	printf("exit thread_func[%d]\n", thread_idx);
+	return EXIT_SUCCESS;
+}
+int main(int argc, char** args)
+{
+	std::future<int> thread_01_result = std::async(std::launch::async, thread_func, 1);
+	if(EXIT_SUCCESS != thread_01_result.get()){
+		printf("ERROR : thread_01 run failed.\n");
+		return EXIT_FAILURE;
+	}
+	printf("Exit main function .\n");
+	return 0;
 }

model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/models/cutoff_yolov5n_w8a8.qnn216.ctx.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3aee9e73507ebbffbd59b426b06531d05f4bc8d0ea934880a32804b6cfda720a
+size 2124248

model_farm_yolov5n_qcs8550_qnn2.16_int8_aidlite/python/demo_qnn.py CHANGED Viewed

@@ -1,338 +1,338 @@
-import time
-import numpy as np
-import cv2
-import aidlite
-import argparse
-import os
-OBJ_CLASS_NUM = 80
-NMS_THRESH = 0.45
-BOX_THRESH = 0.5
-MODEL_SIZE = 640
-OBJ_NUMB_MAX_SIZE = 64
-PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
-STRIDE8_SIZE = (MODEL_SIZE / 8)
-STRIDE16_SIZE = (MODEL_SIZE / 16)
-STRIDE32_SIZE = (MODEL_SIZE / 32)
-anchors = [[10, 13, 16, 30, 33, 23],
-           [30, 61, 62, 45, 59, 119],
-           [116, 90, 156, 198, 373, 326]]
-current_p =os.path.dirname(os.path.abspath(__file__))
-coco_class = [
-    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
-    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
-    'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
-    'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
-    'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
-    'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
-    'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
-    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
-def eqprocess(image, size1, size2):
-    h, w, _ = image.shape
-    mask = np.zeros((size1, size2, 3), dtype=np.float32)
-    scale1 = h / size1
-    scale2 = w / size2
-    if scale1 > scale2:
-        scale = scale1
-    else:
-        scale = scale2
-    img = cv2.resize(image, (int(w / scale), int(h / scale)))
-    mask[:int(h / scale), :int(w / scale), :] = img
-    return mask, scale
-def xywh2xyxy(x):
-    '''
-    Box (center x, center y, width, height) to (x1, y1, x2, y2)
-    '''
-    y = np.copy(x)
-    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
-    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
-    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
-    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
-    return y
-def xyxy2xywh(box):
-    '''
-    Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
-    '''
-    box[:, 2:] = box[:, 2:] - box[:, :2]
-    return box
-def NMS(dets, scores, thresh):
-    '''
-    单类NMS算法
-    dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
-    '''
-    x1 = dets[:, 0]
-    y1 = dets[:, 1]
-    x2 = dets[:, 2]
-    y2 = dets[:, 3]
-    areas = (y2 - y1 + 1) * (x2 - x1 + 1)
-    keep = []
-    index = scores.argsort()[::-1]
-    while index.size > 0:
-        i = index[0]  # every time the first is the biggst, and add it directly
-        keep.append(i)
-        x11 = np.maximum(x1[i], x1[index[1:]])  # calculate the points of overlap
-        y11 = np.maximum(y1[i], y1[index[1:]])
-        x22 = np.minimum(x2[i], x2[index[1:]])
-        y22 = np.minimum(y2[i], y2[index[1:]])
-        w = np.maximum(0, x22 - x11 + 1)  # the weights of overlap
-        h = np.maximum(0, y22 - y11 + 1)  # the height of overlap
-        overlaps = w * h
-        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
-        idx = np.where(ious <= thresh)[0]
-        index = index[idx + 1]  # because index start from 1
-    return keep
-def clip_coords(boxes, img_shape):
-    # Clip bounding xyxy bounding boxes to image shape (height, width)
-    boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0])  # x1
-    boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1])  # y1
-    boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2])  # x2
-    boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3])  # y2
-def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
-    '''
-    检测输出后处理
-    prediction: aidlite模型预测输出
-    img0shape: 原始图片shape
-    img1shape: 输入图片shape
-    conf_thres: 置信度阈值
-    iou_thres: IOU阈值
-    return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
-    '''
-    h, w, _ = img1shape
-    valid_condidates = prediction[prediction[..., 4] > conf_thres]
-    valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
-    valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
-    max_det = 300
-    max_wh = 7680
-    max_nms = 30000
-    valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
-    valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
-    sort_id = np.argsort(valid_condidates[:, 4])[::-1]
-    valid_condidates = valid_condidates[sort_id[:max_nms]]
-    boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
-    index = NMS(boxes, scores, iou_thres)[:max_det]
-    out_boxes = valid_condidates[index]
-    clip_coords(out_boxes[:, :4], img0shape)
-    out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
-    print("检测到{}个区域".format(len(out_boxes)))
-    return out_boxes
-def draw_detect_res(img, det_pred):
-    '''
-    检测结果绘制
-    '''
-    img = img.astype(np.uint8)
-    color_step = int(255 / len(coco_class))
-    for i in range(len(det_pred)):
-        x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
-        score = det_pred[i][4]
-        cls_id = int(det_pred[i][5])
-        print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
-        cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-        cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
-                      thickness=2)
-    return img
-class Detect():
-    # YOLOv5 Detect head for detection models
-    def __init__(self, nc=80, anchors=(), stride=[], image_size=640):  # detection layer
-        super().__init__()
-        self.nc = nc  # number of classes
-        self.no = nc + 5  # number of outputs per anchor
-        self.stride = stride
-        self.nl = len(anchors)  # number of detection layers
-        self.na = len(anchors[0]) // 2  # number of anchors
-        self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
-        self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
-        base_scale = image_size // 8
-        for i in range(self.nl):
-            self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
-    def _make_grid(self, nx=20, ny=20, i=0):
-        y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
-        yv, xv = np.meshgrid(y, x)
-        yv, xv = yv.T, xv.T
-        # add grid offset, i.e. y = 2.0 * x - 0.5
-        grid = np.stack((xv, yv), 2)
-        grid = grid[np.newaxis, np.newaxis, ...]
-        grid = np.repeat(grid, self.na, axis=1) - 0.5
-        anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
-        anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
-        anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
-        return grid, anchor_grid
-    def sigmoid(self, arr):
-        return 1 / (1 + np.exp(-arr))
-    def __call__(self, x):
-        z = []  # inference output
-        for i in range(self.nl):
-            bs, _, ny, nx = x[i].shape
-            x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
-            y = self.sigmoid(x[i])
-            y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i]  # xy
-            y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
-            z.append(y.reshape(bs, self.na * nx * ny, self.no))
-        return np.concatenate(z, 1)
-def main():
-    args = parser_args()
-    target_model = args.target_model
-    model_type = args.model_type
-    size = int(args.size)
-    imgs = args.imgs
-    invoke_nums = int(args.invoke_nums)
-    print("Start main ... ...")
-    # aidlite.set_log_level(aidlite.LogLevel.INFO)
-    # aidlite.log_to_stderr()
-    # print(f"Aidlite library version : {aidlite.get_library_version()}")
-    # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
-    config = aidlite.Config.create_instance()
-    if config is None:
-        print("Create config failed !")
-        return False
-    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
-    if model_type.lower()=="qnn":
-        config.framework_type = aidlite.FrameworkType.TYPE_QNN
-    elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
-        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
-    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
-    config.is_quantify_model = 1
-    model = aidlite.Model.create_instance(target_model)
-    if model is None:
-        print("Create model failed !")
-        return False
-    input_shapes = [[1, size, size, 3]]
-    output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
-    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
-                               output_shapes, aidlite.DataType.TYPE_FLOAT32)
-    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
-    if interpreter is None:
-        print("build_interpretper_from_model_and_config failed !")
-        return None
-    result = interpreter.init()
-    if result != 0:
-        print(f"interpreter init failed !")
-        return False
-    result = interpreter.load_model()
-    if result != 0:
-        print("interpreter load model failed !")
-        return False
-    print("detect model load success!")
-    # image process
-    frame = cv2.imread(imgs)
-    # 图片做等比缩放
-    img_processed = np.copy(frame)
-    [height, width, _] = img_processed.shape
-    length = max((height, width))
-    scale = length / size
-    ratio=[scale,scale]
-    image = np.zeros((length, length, 3), np.uint8)
-    image[0:height, 0:width] = img_processed
-    img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    img_input=cv2.resize(img_input,(size,size))
-    mean_data=[0, 0, 0]
-    std_data=[255, 255, 255]
-    img_input = (img_input-mean_data)/std_data  # HWC
-    img_input = img_input.astype(np.float32)
-    # qnn run
-    invoke_time=[]
-    for i in range(invoke_nums):
-        result = interpreter.set_input_tensor(0, img_input.data)
-        if result != 0:
-            print("interpreter set_input_tensor() failed")
-        t1=time.time()
-        result = interpreter.invoke()
-        cost_time = (time.time()-t1)*1000
-        invoke_time.append(cost_time)
-        if result != 0:
-            print("interpreter set_input_tensor() failed")
-        stride8 = interpreter.get_output_tensor(0)
-        stride16 = interpreter.get_output_tensor(1)
-        stride32 = interpreter.get_output_tensor(2)
-    result = interpreter.destory()
-    ## time 统计
-    max_invoke_time = max(invoke_time)
-    min_invoke_time = min(invoke_time)
-    mean_invoke_time = sum(invoke_time)/invoke_nums
-    var_invoketime=np.var(invoke_time)
-    print("=======================================")
-    print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
-    print("=======================================")
-    ##  后处理
-    stride = [8, 16, 32]
-    yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
-    validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
-    validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
-    validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
-    pred = yolo_head([validCount0, validCount1, validCount2])
-    det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
-    det_pred[np.isnan(det_pred)] = 0.0
-    det_pred[:, :4] = det_pred[:, :4] * scale
-    res_img = draw_detect_res(frame, det_pred)
-    save_path=os.path.join(current_p,"result.jpg")
-    cv2.imwrite(save_path, res_img)
-    print("图片保存在",save_path)
-    print("=======================================")
-    return True
-image_path = os.path.join(current_p,"bus.jpg")
-def parser_args():
-    parser = argparse.ArgumentParser(description="Run model benchmarks")
-    parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin.aidem'),help="inference model path")
-    parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
-    parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
-    parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
-    parser.add_argument('--size',type=str,default=640,help="model input size")
-    args = parser.parse_args()
-    return args
-if __name__ == "__main__":
-    main()

+import time
+import numpy as np
+import cv2
+import aidlite
+import argparse
+import os
+OBJ_CLASS_NUM = 80
+NMS_THRESH = 0.45
+BOX_THRESH = 0.5
+MODEL_SIZE = 640
+OBJ_NUMB_MAX_SIZE = 64
+PROP_BOX_SIZE = (5 + OBJ_CLASS_NUM)
+STRIDE8_SIZE = (MODEL_SIZE / 8)
+STRIDE16_SIZE = (MODEL_SIZE / 16)
+STRIDE32_SIZE = (MODEL_SIZE / 32)
+anchors = [[10, 13, 16, 30, 33, 23],
+           [30, 61, 62, 45, 59, 119],
+           [116, 90, 156, 198, 373, 326]]
+current_p =os.path.dirname(os.path.abspath(__file__))
+coco_class = [
+    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
+    'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
+    'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
+    'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
+    'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
+    'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
+    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
+def eqprocess(image, size1, size2):
+    h, w, _ = image.shape
+    mask = np.zeros((size1, size2, 3), dtype=np.float32)
+    scale1 = h / size1
+    scale2 = w / size2
+    if scale1 > scale2:
+        scale = scale1
+    else:
+        scale = scale2
+    img = cv2.resize(image, (int(w / scale), int(h / scale)))
+    mask[:int(h / scale), :int(w / scale), :] = img
+    return mask, scale
+def xywh2xyxy(x):
+    '''
+    Box (center x, center y, width, height) to (x1, y1, x2, y2)
+    '''
+    y = np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def xyxy2xywh(box):
+    '''
+    Box (left_top x, left_top y, right_bottom x, right_bottom y) to (left_top x, left_top y, width, height)
+    '''
+    box[:, 2:] = box[:, 2:] - box[:, :2]
+    return box
+def NMS(dets, scores, thresh):
+    '''
+    单类NMS算法
+    dets.shape = (N, 5), (left_top x, left_top y, right_bottom x, right_bottom y, Scores)
+    '''
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    areas = (y2 - y1 + 1) * (x2 - x1 + 1)
+    keep = []
+    index = scores.argsort()[::-1]
+    while index.size > 0:
+        i = index[0]  # every time the first is the biggst, and add it directly
+        keep.append(i)
+        x11 = np.maximum(x1[i], x1[index[1:]])  # calculate the points of overlap
+        y11 = np.maximum(y1[i], y1[index[1:]])
+        x22 = np.minimum(x2[i], x2[index[1:]])
+        y22 = np.minimum(y2[i], y2[index[1:]])
+        w = np.maximum(0, x22 - x11 + 1)  # the weights of overlap
+        h = np.maximum(0, y22 - y11 + 1)  # the height of overlap
+        overlaps = w * h
+        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
+        idx = np.where(ious <= thresh)[0]
+        index = index[idx + 1]  # because index start from 1
+    return keep
+def clip_coords(boxes, img_shape):
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    boxes[:, 0].clip(0, img_shape[1], out=boxes[:, 0])  # x1
+    boxes[:, 1].clip(0, img_shape[0], out=boxes[:, 1])  # y1
+    boxes[:, 2].clip(0, img_shape[1], out=boxes[:, 2])  # x2
+    boxes[:, 3].clip(0, img_shape[0], out=boxes[:, 3])  # y2
+def detect_postprocess(prediction, img0shape, img1shape, conf_thres=0.25, iou_thres=0.45):
+    '''
+    检测输出后处理
+    prediction: aidlite模型预测输出
+    img0shape: 原始图片shape
+    img1shape: 输入图片shape
+    conf_thres: 置信度阈值
+    iou_thres: IOU阈值
+    return: list[np.ndarray(N, 5)], 对应类别的坐标框信息, xywh、conf
+    '''
+    h, w, _ = img1shape
+    valid_condidates = prediction[prediction[..., 4] > conf_thres]
+    valid_condidates[:, 5:] *= valid_condidates[:, 4:5]
+    valid_condidates[:, :4] = xywh2xyxy(valid_condidates[:, :4])
+    max_det = 300
+    max_wh = 7680
+    max_nms = 30000
+    valid_condidates[:, 4] = valid_condidates[:, 5:].max(1)
+    valid_condidates[:, 5] = valid_condidates[:, 5:].argmax(1)
+    sort_id = np.argsort(valid_condidates[:, 4])[::-1]
+    valid_condidates = valid_condidates[sort_id[:max_nms]]
+    boxes, scores = valid_condidates[:, :4] + valid_condidates[:, 5:6] * max_wh, valid_condidates[:, 4]
+    index = NMS(boxes, scores, iou_thres)[:max_det]
+    out_boxes = valid_condidates[index]
+    clip_coords(out_boxes[:, :4], img0shape)
+    out_boxes[:, :4] = xyxy2xywh(out_boxes[:, :4])
+    print("检测到{}个区域".format(len(out_boxes)))
+    return out_boxes
+def draw_detect_res(img, det_pred):
+    '''
+    检测结果绘制
+    '''
+    img = img.astype(np.uint8)
+    color_step = int(255 / len(coco_class))
+    for i in range(len(det_pred)):
+        x1, y1, x2, y2 = [int(t) for t in det_pred[i][:4]]
+        score = det_pred[i][4]
+        cls_id = int(det_pred[i][5])
+        print(i + 1, [x1, y1, x2, y2], score, coco_class[cls_id])
+        cv2.putText(img, f'{coco_class[cls_id]}', (x1, y1 - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        cv2.rectangle(img, (x1, y1), (x2 + x1, y2 + y1), (0, int(cls_id * color_step), int(255 - cls_id * color_step)),
+                      thickness=2)
+    return img
+class Detect():
+    # YOLOv5 Detect head for detection models
+    def __init__(self, nc=80, anchors=(), stride=[], image_size=640):  # detection layer
+        super().__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.stride = stride
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid, self.anchor_grid = [0] * self.nl, [0] * self.nl
+        self.anchors = np.array(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
+        base_scale = image_size // 8
+        for i in range(self.nl):
+            self.grid[i], self.anchor_grid[i] = self._make_grid(base_scale // (2 ** i), base_scale // (2 ** i), i)
+    def _make_grid(self, nx=20, ny=20, i=0):
+        y, x = np.arange(ny, dtype=np.float32), np.arange(nx, dtype=np.float32)
+        yv, xv = np.meshgrid(y, x)
+        yv, xv = yv.T, xv.T
+        # add grid offset, i.e. y = 2.0 * x - 0.5
+        grid = np.stack((xv, yv), 2)
+        grid = grid[np.newaxis, np.newaxis, ...]
+        grid = np.repeat(grid, self.na, axis=1) - 0.5
+        anchor_grid = self.anchors[i].reshape((1, self.na, 1, 1, 2))
+        anchor_grid = np.repeat(anchor_grid, repeats=ny, axis=2)
+        anchor_grid = np.repeat(anchor_grid, repeats=nx, axis=3)
+        return grid, anchor_grid
+    def sigmoid(self, arr):
+        return 1 / (1 + np.exp(-arr))
+    def __call__(self, x):
+        z = []  # inference output
+        for i in range(self.nl):
+            bs, _, ny, nx = x[i].shape
+            x[i] = x[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
+            y = self.sigmoid(x[i])
+            y[..., 0:2] = (y[..., 0:2] * 2. + self.grid[i]) * self.stride[i]  # xy
+            y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+            z.append(y.reshape(bs, self.na * nx * ny, self.no))
+        return np.concatenate(z, 1)
+def main():
+    args = parser_args()
+    target_model = args.target_model
+    model_type = args.model_type
+    size = int(args.size)
+    imgs = args.imgs
+    invoke_nums = int(args.invoke_nums)
+    print("Start main ... ...")
+    # aidlite.set_log_level(aidlite.LogLevel.INFO)
+    # aidlite.log_to_stderr()
+    # print(f"Aidlite library version : {aidlite.get_library_version()}")
+    # print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
+    config = aidlite.Config.create_instance()
+    if config is None:
+        print("Create config failed !")
+        return False
+    config.implement_type = aidlite.ImplementType.TYPE_LOCAL
+    if model_type.lower()=="qnn":
+        config.framework_type = aidlite.FrameworkType.TYPE_QNN
+    elif model_type.lower()=="snpe2" or model_type.lower()=="snpe":
+        config.framework_type = aidlite.FrameworkType.TYPE_SNPE2
+    config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
+    config.is_quantify_model = 1
+    model = aidlite.Model.create_instance(target_model)
+    if model is None:
+        print("Create model failed !")
+        return False
+    input_shapes = [[1, size, size, 3]]
+    output_shapes = [[1, 20, 20, 255], [1, 40, 40, 255], [1, 80, 80, 255]]
+    model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
+                               output_shapes, aidlite.DataType.TYPE_FLOAT32)
+    interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
+    if interpreter is None:
+        print("build_interpretper_from_model_and_config failed !")
+        return None
+    result = interpreter.init()
+    if result != 0:
+        print(f"interpreter init failed !")
+        return False
+    result = interpreter.load_model()
+    if result != 0:
+        print("interpreter load model failed !")
+        return False
+    print("detect model load success!")
+    # image process
+    frame = cv2.imread(imgs)
+    # 图片做等比缩放
+    img_processed = np.copy(frame)
+    [height, width, _] = img_processed.shape
+    length = max((height, width))
+    scale = length / size
+    ratio=[scale,scale]
+    image = np.zeros((length, length, 3), np.uint8)
+    image[0:height, 0:width] = img_processed
+    img_input = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    img_input=cv2.resize(img_input,(size,size))
+    mean_data=[0, 0, 0]
+    std_data=[255, 255, 255]
+    img_input = (img_input-mean_data)/std_data  # HWC
+    img_input = img_input.astype(np.float32)
+    # qnn run
+    invoke_time=[]
+    for i in range(invoke_nums):
+        result = interpreter.set_input_tensor(0, img_input.data)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        t1=time.time()
+        result = interpreter.invoke()
+        cost_time = (time.time()-t1)*1000
+        invoke_time.append(cost_time)
+        if result != 0:
+            print("interpreter set_input_tensor() failed")
+        stride8 = interpreter.get_output_tensor(0)
+        stride16 = interpreter.get_output_tensor(1)
+        stride32 = interpreter.get_output_tensor(2)
+    result = interpreter.destory()
+    ## time 统计
+    max_invoke_time = max(invoke_time)
+    min_invoke_time = min(invoke_time)
+    mean_invoke_time = sum(invoke_time)/invoke_nums
+    var_invoketime=np.var(invoke_time)
+    print("=======================================")
+    print(f"QNN inference {invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
+    print("=======================================")
+    ##  后处理
+    stride = [8, 16, 32]
+    yolo_head = Detect(OBJ_CLASS_NUM, anchors, stride, MODEL_SIZE)
+    validCount0 = stride8.reshape(*output_shapes[2]).transpose(0, 3, 1, 2)
+    validCount1 = stride16.reshape(*output_shapes[1]).transpose(0, 3, 1, 2)
+    validCount2 = stride32.reshape(*output_shapes[0]).transpose(0, 3, 1, 2)
+    pred = yolo_head([validCount0, validCount1, validCount2])
+    det_pred = detect_postprocess(pred, frame.shape, [MODEL_SIZE, MODEL_SIZE, 3], conf_thres=0.5, iou_thres=0.45)
+    det_pred[np.isnan(det_pred)] = 0.0
+    det_pred[:, :4] = det_pred[:, :4] * scale
+    res_img = draw_detect_res(frame, det_pred)
+    save_path=os.path.join(current_p,"result.jpg")
+    cv2.imwrite(save_path, res_img)
+    print("图片保存在",save_path)
+    print("=======================================")
+    return True
+image_path = os.path.join(current_p,"bus.jpg")
+def parser_args():
+    parser = argparse.ArgumentParser(description="Run model benchmarks")
+    parser.add_argument('--target_model',type=str,default=os.path.join(current_p,'../models/cutoff_yolov5n_w8a8.qnn216.ctx.bin'),help="inference model path")
+    parser.add_argument('--imgs',type=str,default=image_path,help="Predict images path")
+    parser.add_argument('--invoke_nums',type=str,default=10,help="Inference nums")
+    parser.add_argument('--model_type',type=str,default='QNN',help="run backend")
+    parser.add_argument('--size',type=str,default=640,help="model input size")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    main()