qc903113684 commited on
Commit
b4805de
·
verified ·
1 Parent(s): 4ae2b6c

Upload 121 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +11 -0
  2. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/README.md +63 -0
  3. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/blazebase.cpython-38.pyc +0 -0
  4. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/blazebase.cpython-39.pyc +0 -0
  5. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/blazeface.cpython-39.pyc +0 -0
  6. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/blazeface_landmark.cpython-39.pyc +0 -0
  7. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/visualization.cpython-38.pyc +0 -0
  8. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/visualization.cpython-39.pyc +0 -0
  9. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/CMakeLists.txt +34 -0
  10. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/anchors_float32.npy +3 -0
  11. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/coco.jpg +3 -0
  12. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/run_test.cpp +909 -0
  13. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/anchors_face_back.npy +3 -0
  14. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/blazeface_landmark.pth +3 -0
  15. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/blazefaceback.pth +3 -0
  16. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/m_faceDetctor.pt +3 -0
  17. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/m_faceDetctor_w8a16.qnn216.ctx.bin +3 -0
  18. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/m_faceLandmark.pt +3 -0
  19. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/m_faceLandmark_w8a16.qnn216.ctx.bin +3 -0
  20. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/0000.jpg +3 -0
  21. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/blazebase.cpython-38.pyc +0 -0
  22. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/blazebase.cpython-39.pyc +0 -0
  23. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/blazeface.cpython-39.pyc +0 -0
  24. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/blazeface_landmark.cpython-39.pyc +0 -0
  25. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/visualization.cpython-38.pyc +0 -0
  26. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/blazebase.py +513 -0
  27. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/blazeface.py +182 -0
  28. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/blazeface_landmark.py +74 -0
  29. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/coco.jpg +3 -0
  30. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/demo_qnn.py +389 -0
  31. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/export_jit.py +57 -0
  32. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/visualization.py +125 -0
  33. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/README.md +63 -0
  34. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt +34 -0
  35. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/anchors_float32.npy +3 -0
  36. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/coco.jpg +3 -0
  37. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp +909 -0
  38. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/models/m_faceDetector_w8a8.qnn216.ctx.bin +3 -0
  39. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/models/m_faceLandmark_w8a8.qnn216.ctx.bin +3 -0
  40. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/anchors_face_back.npy +3 -0
  41. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/anchors_float32.npy +3 -0
  42. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/blazebase.py +513 -0
  43. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/blazeface.py +182 -0
  44. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/blazeface_landmark.py +74 -0
  45. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/coco.jpg +3 -0
  46. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py +424 -0
  47. model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/visualization.py +125 -0
  48. model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/README.md +63 -0
  49. model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/__pycache__/blazebase.cpython-38.pyc +0 -0
  50. model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/__pycache__/blazebase.cpython-39.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/coco.jpg filter=lfs diff=lfs merge=lfs -text
37
+ model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/0000.jpg filter=lfs diff=lfs merge=lfs -text
38
+ model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/coco.jpg filter=lfs diff=lfs merge=lfs -text
39
+ model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/coco.jpg filter=lfs diff=lfs merge=lfs -text
40
+ model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/coco.jpg filter=lfs diff=lfs merge=lfs -text
41
+ model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/cpp/coco.jpg filter=lfs diff=lfs merge=lfs -text
42
+ model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/python/coco.jpg filter=lfs diff=lfs merge=lfs -text
43
+ model_farm_mediapipefacedetection_qcs8550_qnn2.16_int8_aidlite/cpp/coco.jpg filter=lfs diff=lfs merge=lfs -text
44
+ model_farm_mediapipefacedetection_qcs8550_qnn2.16_int8_aidlite/python/coco.jpg filter=lfs diff=lfs merge=lfs -text
45
+ model_farm_mediapipefacedetection_qcs8550_qnn2.16_w8a16_aidlite/cpp/coco.jpg filter=lfs diff=lfs merge=lfs -text
46
+ model_farm_mediapipefacedetection_qcs8550_qnn2.16_w8a16_aidlite/python/coco.jpg filter=lfs diff=lfs merge=lfs -text
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: 256x256
4
+ - Number of parameters:0.13M, 0.6M
5
+ - Model size:0.58MB, 2.32MB
6
+ - Output shape: [1x896x16, 1x896x1], [1, 1x486x3]
7
+
8
+ Source model repository: [MediaPipe-Face-Detection](https://github.com/zmurez/MediaPipePyTorch/)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT16
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### c++
49
+ ```bash
50
+ # 加载.npy文件需要用到cnpy库(终端默认路径下执行即可)
51
+ git clone https://github.com/rogersce/cnpy.git
52
+ cd cnpy
53
+ mkdir build && cd build
54
+ cmake ..
55
+ make
56
+ sudo make install
57
+
58
+ cd mediapipe-face-detection/model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp
59
+ mkdir build && cd build
60
+ cmake ..
61
+ make
62
+ ./run_test
63
+ ```
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/blazebase.cpython-38.pyc ADDED
Binary file (16.5 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/blazebase.cpython-39.pyc ADDED
Binary file (16.5 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/blazeface.cpython-39.pyc ADDED
Binary file (3.95 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/blazeface_landmark.cpython-39.pyc ADDED
Binary file (2.07 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/visualization.cpython-38.pyc ADDED
Binary file (4.54 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/__pycache__/visualization.cpython-39.pyc ADDED
Binary file (3.92 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+ find_library(CNPY_LIB cnpy REQUIRED)
6
+
7
+ message(STATUS "oPENCV Library status:")
8
+ message(STATUS ">version:${OpenCV_VERSION}")
9
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
10
+
11
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
12
+
13
+ include_directories(
14
+ /usr/local/include
15
+ /usr/include/opencv4
16
+ )
17
+
18
+ link_directories(
19
+ /usr/local/lib/
20
+ )
21
+
22
+ file(GLOB SRC_LISTS
23
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
24
+ )
25
+
26
+ add_executable(run_test ${SRC_LISTS})
27
+
28
+ target_link_libraries(run_test
29
+ aidlite
30
+ ${OpenCV_LIBS}
31
+ pthread
32
+ jsoncpp
33
+ ${CNPY_LIB}
34
+ )
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/anchors_float32.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79fa15d63ca4c37eaa953ddb462623e52b07f9af52be5deb7b935b8d3c3d7d94
3
+ size 14464
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/coco.jpg ADDED

Git LFS Details

  • SHA256: f7d96313cff4ba7511af36d8dbc358dd33b2815ec378680a09b97353cadb2378
  • Pointer size: 131 Bytes
  • Size of remote file: 159 kB
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,909 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <fstream>
3
+ #include <opencv2/opencv.hpp>
4
+ #include <aidlux/aidlite/aidlite.hpp>
5
+ #include <vector>
6
+ #include <numeric>
7
+ #include <cmath>
8
+ #include <jsoncpp/json/json.h>
9
+ #include <tuple>
10
+ #include <algorithm>
11
+ #include <sstream>
12
+ #include <string>
13
+ #include <cassert>
14
+ #include "cnpy.h"
15
+
16
+ using namespace cv;
17
+ using namespace std;
18
+ using namespace Aidlux::Aidlite;
19
+
20
+
21
+ // 人脸 landmark 连接索引定义(来自 MediaPipe Face Mesh)
22
+ const std::vector<std::pair<int, int>> FACE_CONNECTIONS = {
23
+ {61, 146}, {146, 91}, {91, 181}, {181, 84}, {84, 17},
24
+ {17, 314}, {314, 405}, {405, 321}, {321, 375}, {375, 291},
25
+ {61, 185}, {185, 40}, {40, 39}, {39, 37}, {37, 0},
26
+ {0, 267}, {267, 269}, {269, 270}, {270, 409}, {409, 291},
27
+ {78, 95}, {95, 88}, {88, 178}, {178, 87}, {87, 14},
28
+ {14, 317}, {317, 402}, {402, 318}, {318, 324}, {324, 308},
29
+ {78, 191}, {191, 80}, {80, 81}, {81, 82}, {82, 13},
30
+ {13, 312}, {312, 311}, {311, 310}, {310, 415}, {415, 308},
31
+ {263, 249}, {249, 390}, {390, 373}, {373, 374}, {374, 380},
32
+ {380, 381}, {381, 382}, {382, 362}, {263, 466}, {466, 388},
33
+ {388, 387}, {387, 386}, {386, 385}, {385, 384}, {384, 398},
34
+ {398, 362}, {276, 283}, {283, 282}, {282, 295}, {295, 285},
35
+ {300, 293}, {293, 334}, {334, 296}, {296, 336}, {33, 7},
36
+ {7, 163}, {163, 144}, {144, 145}, {145, 153}, {153, 154},
37
+ {154, 155}, {155, 133}, {33, 246}, {246, 161}, {161, 160},
38
+ {160, 159}, {159, 158}, {158, 157}, {157, 173}, {173, 133},
39
+ {46, 53}, {53, 52}, {52, 65}, {65, 55}, {70, 63}, {63, 105},
40
+ {105, 66}, {66, 107}, {10, 338}, {338, 297}, {297, 332},
41
+ {332, 284}, {284, 251}, {251, 389}, {389, 356}, {356, 454},
42
+ {454, 323}, {323, 361}, {361, 288}, {288, 397}, {397, 365},
43
+ {365, 379}, {379, 378}, {378, 400}, {400, 377}, {377, 152},
44
+ {152, 148}, {148, 176}, {176, 149}, {149, 150}, {150, 136},
45
+ {136, 172}, {172, 58}, {58, 132}, {132, 93}, {93, 234},
46
+ {234, 127}, {127, 162}, {162, 21}, {21, 54}, {54, 103},
47
+ {103, 67}, {67, 109}, {109, 10}
48
+ };
49
+
50
+ struct Args {
51
+ std::string faceDetector_model = "../../models/m_faceDetctor_w8a16.qnn216.ctx.bin";
52
+ std::string faceLandmark_model = "../../models/m_faceLandmark_w8a16.qnn216.ctx.bin";
53
+ std::string imgs = "../coco.jpg";
54
+ int invoke_nums = 10;
55
+ std::string model_type = "QNN";
56
+ };
57
+
58
+
59
+ Args parse_args(int argc, char* argv[]) {
60
+ Args args;
61
+ for (int i = 1; i < argc; ++i) {
62
+ std::string arg = argv[i];
63
+ if (arg == "--faceDetector_model" && i + 1 < argc) {
64
+ args.faceDetector_model = argv[++i];
65
+ } else if (arg == "--faceLandmark_model" && i + 1 < argc) {
66
+ args.faceLandmark_model = argv[++i];
67
+ } else if (arg == "--imgs" && i + 1 < argc) {
68
+ args.imgs = argv[++i];
69
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
70
+ args.invoke_nums = std::stoi(argv[++i]);
71
+ } else if (arg == "--model_type" && i + 1 < argc) {
72
+ args.model_type = argv[++i];
73
+ }
74
+ }
75
+ return args;
76
+ }
77
+
78
+ std::string to_lower(const std::string& str) {
79
+ std::string lower_str = str;
80
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
81
+ return std::tolower(c);
82
+ });
83
+ return lower_str;
84
+ }
85
+
86
+ std::vector<std::vector<float>> load_anchors_from_npy(const std::string& path) {
87
+ cnpy::NpyArray arr = cnpy::npy_load(path);
88
+ float* data_ptr = arr.data<float>();
89
+
90
+ size_t num_rows = arr.shape[0]; // 896
91
+ size_t num_cols = arr.shape[1]; // 4
92
+
93
+ std::vector<std::vector<float>> anchors(num_rows, std::vector<float>(num_cols));
94
+ for (size_t i = 0; i < num_rows; ++i) {
95
+ for (size_t j = 0; j < num_cols; ++j) {
96
+ anchors[i][j] = data_ptr[i * num_cols + j];
97
+ }
98
+ }
99
+
100
+ return anchors;
101
+ }
102
+
103
+
104
+ // 绘制人脸关键点和连接线
105
+ void draw_landmarks(
106
+ cv::Mat& img,
107
+ const std::vector<cv::Point2f>& points,
108
+ const std::vector<float>& flags,
109
+ const std::vector<std::pair<int, int>>& connections,
110
+ float threshold = 0.4f,
111
+ cv::Scalar point_color = cv::Scalar(0, 255, 0),
112
+ cv::Scalar line_color = cv::Scalar(0, 0, 0),
113
+ int size = 2)
114
+ {
115
+ // 画关键点
116
+ for (size_t i = 0; i < points.size(); ++i) {
117
+ // if (i < flags.size() && flags[i] > threshold) {
118
+ int x = static_cast<int>(points[i].x);
119
+ int y = static_cast<int>(points[i].y);
120
+ cv::circle(img, cv::Point(x, y), size, point_color, size);
121
+ // }
122
+ }
123
+
124
+ // 画连接线(两端都要可见)
125
+ for (const auto& conn : connections) {
126
+ int i0 = conn.first;
127
+ int i1 = conn.second;
128
+ // if (i0 < points.size() && i1 < points.size() &&
129
+ // i0 < flags.size() && i1 < flags.size() &&
130
+ // flags[i0] > threshold && flags[i1] > threshold)
131
+ // {
132
+ cv::line(img, points[i0], points[i1], line_color, size);
133
+ // }
134
+ }
135
+ }
136
+
137
+
138
+ std::tuple<cv::Mat, cv::Mat, float, cv::Point> resize_pad(const cv::Mat& img) {
139
+ int orig_h = img.rows; // 480
140
+ int orig_w = img.cols; // 640
141
+
142
+ // Step 1: resize width to 256, keep aspect ratio
143
+ int w1 = 256;
144
+ int h1 = w1 * orig_h / orig_w; // 等效于 int(256 * h / w)
145
+
146
+ // Step 2: compute padding in height direction
147
+ int padh = 256 - h1;
148
+ int padw = 0;
149
+
150
+ int padh1 = padh / 2;
151
+ int padh2 = padh1 + (padh % 2);
152
+ int padw1 = padw / 2;
153
+ int padw2 = padw1 + (padw % 2);
154
+
155
+ // Step 3: resize to (w1, h1)
156
+ cv::Mat resized;
157
+ cv::resize(img, resized, cv::Size(w1, h1)); // (256, h1)
158
+
159
+ // Step 4: pad to (256, 256)
160
+ cv::Mat padded;
161
+ cv::copyMakeBorder(resized, padded, padh1, padh2, padw1, padw2, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
162
+
163
+ // Step 5: resize padded to 128×128
164
+ cv::Mat resized128;
165
+ cv::resize(padded, resized128, cv::Size(128, 128));
166
+
167
+ // Step 6: compute scale and pad in original image space
168
+ float scale = static_cast<float>(orig_h) / h1; // h / h1
169
+ cv::Point pad_point(static_cast<int>(padh1 * scale), static_cast<int>(padw1 * scale));
170
+
171
+ return std::make_tuple(padded, resized128, scale, pad_point);
172
+ }
173
+
174
+
175
+ // 将图像转换为 1xC×H×W 格式并归一化(除以 255)
176
+ std::vector<float> preprocess_image(const cv::Mat& img) {
177
+ int H = img.rows;
178
+ int W = img.cols;
179
+ int C = img.channels(); // should be 3
180
+
181
+ std::vector<float> chw(H * W * C); // CHW
182
+ std::vector<float> nchw(1 * C * H * W); // NCHW
183
+
184
+ // 1. HWC → CHW + normalize (float32 / 255.0)
185
+ for (int h = 0; h < H; ++h) {
186
+ for (int w = 0; w < W; ++w) {
187
+ for (int c = 0; c < C; ++c) {
188
+ // OpenCV uses BGR order
189
+ float value = img.at<cv::Vec3b>(h, w)[c] / 255.0f;
190
+ chw[c * H * W + h * W + w] = value;
191
+ }
192
+ }
193
+ }
194
+
195
+ // 2. CHW → NCHW (add batch dimension, actually just copy)
196
+ for (int i = 0; i < C * H * W; ++i) {
197
+ nchw[i] = chw[i];
198
+ }
199
+
200
+ return nchw; // shape: [1, 3, H, W]
201
+ }
202
+
203
+
204
+ // 只用前4个坐标计算IOU(默认框位置在前4个坐标)
205
+ float IoU(const std::vector<float>& box1, const std::vector<float>& box2) {
206
+ float x1 = std::max(box1[0], box2[0]);
207
+ float y1 = std::max(box1[1], box2[1]);
208
+ float x2 = std::min(box1[2], box2[2]);
209
+ float y2 = std::min(box1[3], box2[3]);
210
+
211
+ float inter_area = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1);
212
+ float box1_area = std::max(0.0f, box1[2] - box1[0]) * std::max(0.0f, box1[3] - box1[1]);
213
+ float box2_area = std::max(0.0f, box2[2] - box2[0]) * std::max(0.0f, box2[3] - box2[1]);
214
+ float union_area = box1_area + box2_area - inter_area;
215
+
216
+ return union_area > 0 ? inter_area / union_area : 0.0f;
217
+ }
218
+
219
+ std::vector<std::vector<float>> weighted_non_max_suppression(
220
+ std::vector<std::vector<float>>& detections,
221
+ int num_coords = 16,
222
+ float min_suppression_threshold = 0.3f)
223
+ {
224
+ if (detections.empty()) return {};
225
+
226
+ std::vector<int> indices(detections.size());
227
+ std::iota(indices.begin(), indices.end(), 0);
228
+
229
+ // 按置信度降序排序
230
+ std::sort(indices.begin(), indices.end(), [&](int a, int b) {
231
+ return detections[a][num_coords] > detections[b][num_coords];
232
+ });
233
+
234
+ std::vector<std::vector<float>> output;
235
+
236
+ while (!indices.empty()) {
237
+ int best_idx = indices.front();
238
+ const auto& best_det = detections[best_idx];
239
+ std::vector<int> overlapping = { best_idx };
240
+
241
+ for (size_t i = 1; i < indices.size(); ++i) {
242
+ float iou = IoU(best_det, detections[indices[i]]);
243
+ if (iou > min_suppression_threshold) {
244
+ overlapping.push_back(indices[i]);
245
+ }
246
+ }
247
+
248
+ // 更新剩余索引
249
+ std::vector<int> new_indices;
250
+ for (int idx : indices) {
251
+ if (std::find(overlapping.begin(), overlapping.end(), idx) == overlapping.end()) {
252
+ new_indices.push_back(idx);
253
+ }
254
+ }
255
+ indices = new_indices;
256
+
257
+ // 加权平均:坐标 * 置信度
258
+ if (overlapping.size() == 1) {
259
+ output.push_back(best_det);
260
+ } else {
261
+ std::vector<float> weighted(num_coords + 1, 0.0f);
262
+ float total_score = 0.0f;
263
+
264
+ for (int idx : overlapping) {
265
+ float score = detections[idx][num_coords];
266
+ total_score += score;
267
+ for (int k = 0; k < num_coords; ++k) {
268
+ weighted[k] += detections[idx][k] * score;
269
+ }
270
+ }
271
+
272
+ for (int k = 0; k < num_coords; ++k) {
273
+ weighted[k] /= total_score;
274
+ }
275
+ weighted[num_coords] = total_score / overlapping.size(); // 取平均得分
276
+
277
+ // std::cout << "Weighted box: ";
278
+ // for (float v : weighted) std::cout << v << " ";
279
+ // std::cout << "\n";
280
+
281
+ output.push_back(weighted);
282
+ }
283
+ }
284
+
285
+ // TODO
286
+ auto x = output[0];
287
+ output.clear();
288
+ output.push_back(x);
289
+
290
+ return output;
291
+ }
292
+
293
+
294
+ std::vector<std::vector<float>> denormalize_detections(
295
+ const std::vector<std::vector<float>>& detections,
296
+ float scale,
297
+ const cv::Point& pad
298
+ ) {
299
+ std::vector<std::vector<float>> result = detections;
300
+
301
+ for (size_t i = 0; i < result.size(); ++i) {
302
+ std::vector<float>& det = result[i];
303
+
304
+ // bbox coords: x1, y1, x2, y2
305
+ det[0] = det[0] * scale * 256.0f - pad.x; // x1
306
+ det[1] = det[1] * scale * 256.0f - pad.y; // y1
307
+ det[2] = det[2] * scale * 256.0f - pad.x; // x2
308
+ det[3] = det[3] * scale * 256.0f - pad.y; // y2
309
+
310
+ // keypoints (starting from index 4): format [y, x, y, x, ...]
311
+ for (size_t k = 4; k + 1 < det.size(); k += 2) {
312
+ det[k] = det[k] * scale * 256.0f - pad.y; // y
313
+ det[k + 1] = det[k + 1] * scale * 256.0f - pad.x; // x
314
+ }
315
+ }
316
+
317
+ return result;
318
+ }
319
+
320
+
321
+ void detection2roi(
322
+ const std::vector<std::vector<float>>& detections,
323
+ std::vector<float>& xc,
324
+ std::vector<float>& yc,
325
+ std::vector<float>& scale,
326
+ std::vector<float>& theta,
327
+ int kp1, int kp2, // 关键点索引
328
+ float dy, float dscale, float theta0
329
+ ) {
330
+ size_t N = detections.size();
331
+ xc.resize(N);
332
+ yc.resize(N);
333
+ scale.resize(N);
334
+ theta.resize(N);
335
+
336
+ for (size_t i = 0; i < N; ++i) {
337
+ const std::vector<float>& det = detections[i];
338
+
339
+ float x1 = det[1];
340
+ float x2 = det[3];
341
+ float y1 = det[0];
342
+ float y2 = det[2];
343
+
344
+ float x_center = (x1 + x2) / 2.0f;
345
+ float y_center = (y1 + y2) / 2.0f;
346
+ float box_scale = (x2 - x1); // assumes square box
347
+
348
+ // yc 偏移
349
+ y_center += dy * box_scale;
350
+ box_scale *= dscale;
351
+
352
+ // 获取两个关键点的位置
353
+ int base = 4;
354
+ int idx_y0 = base + 2 * kp1;
355
+ int idx_x0 = base + 2 * kp1 + 1;
356
+ int idx_y1 = base + 2 * kp2;
357
+ int idx_x1 = base + 2 * kp2 + 1;
358
+
359
+ float x0 = det[idx_x0];
360
+ float y0 = det[idx_y0];
361
+ float x1_kp = det[idx_x1];
362
+ float y1_kp = det[idx_y1];
363
+
364
+ float angle = std::atan2(y0 - y1_kp, x0 - x1_kp) - theta0;
365
+
366
+ // 输出赋值
367
+ xc[i] = x_center;
368
+ yc[i] = y_center;
369
+ scale[i] = box_scale;
370
+ // TODO: 这里的 theta 需要根据实际情况调整
371
+ // theta[i] = angle; // 如果需要使用计算的角度
372
+ theta[i] = -0.0094;
373
+ }
374
+ }
375
+
376
+
377
+ void extract_roi(
378
+ const cv::Mat& frame,
379
+ const std::vector<float>& xc,
380
+ const std::vector<float>& yc,
381
+ const std::vector<float>& theta,
382
+ const std::vector<float>& scale,
383
+ std::vector<cv::Mat>& cropped_rois,
384
+ std::vector<cv::Mat>& affine_matrices,
385
+ std::vector<std::vector<cv::Point2f>>& roi_boxes, // 添加返回点坐标
386
+ int resolution = 192
387
+ ) {
388
+ cropped_rois.clear();
389
+ affine_matrices.clear();
390
+ roi_boxes.clear();
391
+
392
+ for (size_t i = 0; i < xc.size(); ++i) {
393
+ float s = scale[i] / 2.0f;
394
+ float cos_t = std::cos(theta[i]);
395
+ float sin_t = std::sin(theta[i]);
396
+
397
+ // 定义4个 unit square 点经过变换后的点(顺序和 Python 中一样)
398
+ std::vector<cv::Point2f> points(4);
399
+ // [-1, -1]
400
+ points[0].x = xc[i] + (-s * cos_t + s * sin_t);
401
+ points[0].y = yc[i] + (-s * sin_t - s * cos_t);
402
+ // [1, -1]
403
+ points[1].x = xc[i] + ( s * cos_t + s * sin_t);
404
+ points[1].y = yc[i] + ( s * sin_t - s * cos_t);
405
+ // [-1, 1]
406
+ points[2].x = xc[i] + (-s * cos_t - s * sin_t);
407
+ points[2].y = yc[i] + (-s * sin_t + s * cos_t);
408
+ // [1, 1]
409
+ points[3].x = xc[i] + ( s * cos_t - s * sin_t);
410
+ points[3].y = yc[i] + ( s * sin_t + s * cos_t);
411
+
412
+ // 用前三个点计算仿射变换
413
+ std::vector<cv::Point2f> src_pts = { points[0], points[1], points[2] };
414
+ std::vector<cv::Point2f> dst_pts = {
415
+ cv::Point2f(0, 0),
416
+ cv::Point2f(resolution - 1, 0),
417
+ cv::Point2f(0, resolution - 1)
418
+ };
419
+
420
+ cv::Mat M = cv::getAffineTransform(src_pts, dst_pts);
421
+ cv::Mat M_inv;
422
+ cv::invertAffineTransform(M, M_inv);
423
+
424
+ cv::Mat cropped;
425
+ cv::warpAffine(frame, cropped, M, cv::Size(resolution, resolution), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar(127.5, 127.5, 127.5));
426
+ cropped_rois.push_back(cropped);
427
+ affine_matrices.push_back(M_inv);
428
+ roi_boxes.push_back(points); // 添加变换后的 box 点
429
+ }
430
+ }
431
+
432
+ std::vector<float> preprocess_imgs_to_nchw(const std::vector<cv::Mat>& imgs) {
433
+ int N = imgs.size();
434
+ if (N == 0) return {};
435
+
436
+ int H = 192;
437
+ int W = 192;
438
+ int C = 3; // assume 3 channels (BGR)
439
+
440
+ std::vector<float> output;
441
+ output.reserve(N * C * H * W);
442
+
443
+ for (int n = 0; n < N; ++n) {
444
+ cv::Mat img_float;
445
+ imgs[n].convertTo(img_float, CV_32FC3, 1.0 / 255.0); // Normalize to [0,1]
446
+
447
+ // Split channels (HWC → CHW)
448
+ std::vector<cv::Mat> channels(3);
449
+ cv::split(img_float, channels); // channels[0] = B, [1] = G, [2] = R
450
+
451
+ for (int c = 0; c < C; ++c) {
452
+ for (int i = 0; i < H; ++i) {
453
+ for (int j = 0; j < W; ++j) {
454
+ output.push_back(channels[c].at<float>(i, j));
455
+ }
456
+ }
457
+ }
458
+ }
459
+
460
+ return output; // shape: N x C x H x W
461
+ }
462
+
463
+ // resolution 一般为 192
464
+ std::vector<cv::Point2f> denormalize_landmarks(
465
+ const std::vector<float>& normalized_landmarks,
466
+ const std::vector<cv::Mat>& affines,
467
+ int resolution = 192)
468
+ {
469
+ std::vector<cv::Point2f> output;
470
+
471
+ // 检查输入尺寸
472
+ const int num_faces = 1; // 假设只有一个人脸
473
+ const int num_landmarks = 468;
474
+ if (normalized_landmarks.size() != num_faces * num_landmarks * 3 || affines.size() != num_faces) {
475
+ std::cerr << "Error: Input size mismatch. Expected "
476
+ << num_faces * num_landmarks * 3 << " landmarks and "
477
+ << num_faces << " affine matrices." << std::endl;
478
+ throw std::runtime_error("Input size mismatch");
479
+ }
480
+
481
+ for (int i = 0; i < num_faces; ++i) {
482
+ const cv::Mat& affine = affines[i]; // 2x3 CV_32F
483
+ for (int j = 0; j < num_landmarks; ++j) {
484
+ int idx = i * num_landmarks * 3 + j * 3;
485
+ float x = normalized_landmarks[idx + 0] * resolution;
486
+ float y = normalized_landmarks[idx + 1] * resolution;
487
+ // float z = normalized_landmarks[idx + 2]; // 可选使用
488
+
489
+ // 2x1 输入向量
490
+ cv::Mat pt = (cv::Mat_<float>(2, 1) << x, y);
491
+
492
+ // 提取 affine 的旋转和平移
493
+ cv::Mat M2x2 = affine(cv::Rect(0, 0, 2, 2)).clone();
494
+ cv::Mat t2x1 = affine(cv::Rect(2, 0, 1, 2)).clone();
495
+ M2x2.convertTo(M2x2, CV_32F);
496
+ t2x1.convertTo(t2x1, CV_32F);
497
+
498
+ // 反仿射变换
499
+ cv::Mat out = M2x2 * pt + t2x1;
500
+
501
+ // 存储为 Point2f
502
+ output.emplace_back(out.at<float>(0, 0), out.at<float>(1, 0));
503
+ }
504
+ }
505
+
506
+ return output; // 输出为 denormalized landmarks,大小为 468 个 Point2f
507
+ }
508
+
509
+
510
+ void draw_roi(cv::Mat& img, const std::vector<std::vector<cv::Point2f>>& boxes) {
511
+ for (const auto& roi : boxes) {
512
+ if (roi.size() < 4) continue;
513
+
514
+ const cv::Point2f& p1 = roi[0];
515
+ const cv::Point2f& p2 = roi[1];
516
+ const cv::Point2f& p3 = roi[2];
517
+ const cv::Point2f& p4 = roi[3];
518
+
519
+ cv::line(img, p1, p2, cv::Scalar(0, 0, 0), 2); // 黑色
520
+ cv::line(img, p1, p3, cv::Scalar(0, 255, 0), 2); // 绿色
521
+ cv::line(img, p2, p4, cv::Scalar(0, 0, 0), 2); // 黑色
522
+ cv::line(img, p3, p4, cv::Scalar(0, 0, 0), 2); // 黑色
523
+ }
524
+ }
525
+
526
+
527
+ void draw_detections(cv::Mat& img, const std::vector<std::vector<float>>& detections, bool with_keypoints = true) {
528
+ for (const auto& det : detections) {
529
+ if (det.size() < 4) continue;
530
+
531
+ float ymin = det[0];
532
+ float xmin = det[1];
533
+ float ymax = det[2];
534
+ float xmax = det[3];
535
+
536
+ cv::rectangle(img, cv::Point(int(xmin), int(ymin)), cv::Point(int(xmax), int(ymax)), cv::Scalar(255, 0, 0), 1);
537
+
538
+ if (with_keypoints && det.size() > 4) {
539
+ int n_keypoints = (det.size() - 4) / 2;
540
+ for (int k = 0; k < n_keypoints; ++k) {
541
+ int kp_x = int(det[4 + k * 2]);
542
+ int kp_y = int(det[4 + k * 2 + 1]);
543
+ cv::circle(img, cv::Point(kp_x, kp_y), 2, cv::Scalar(0, 0, 255), 2);
544
+ }
545
+ }
546
+ }
547
+ }
548
+
549
+
550
+ std::vector<std::vector<float>> loadAnchors(const std::string& filename) {
551
+ std::ifstream in(filename);
552
+ std::vector<std::vector<float>> anchors;
553
+
554
+ if (!in.is_open()) {
555
+ std::cerr << "Failed to open file: " << filename << std::endl;
556
+ return anchors;
557
+ }
558
+
559
+ std::string line;
560
+ while (std::getline(in, line)) {
561
+ std::istringstream ss(line);
562
+ std::vector<float> anchor;
563
+ float value;
564
+ while (ss >> value) {
565
+ anchor.push_back(value);
566
+ }
567
+ if (!anchor.empty()) {
568
+ anchors.push_back(anchor);
569
+ }
570
+ }
571
+
572
+ in.close();
573
+ return anchors;
574
+ }
575
+
576
+ // sigmoid 函数
577
+ float sigmoid(float x) {
578
+ return 1.0f / (1.0f + std::exp(-x));
579
+ }
580
+
581
+ // clamp 函数
582
+ float clamp(float x, float min_val, float max_val) {
583
+ return std::max(min_val, std::min(max_val, x));
584
+ }
585
+
586
+ // shape: [batch, num_anchors, num_coords] => [batch][anchor][coord]
587
+ std::vector<std::vector<std::vector<float>>> decode_boxes(
588
+ const std::vector<float>& raw_boxes,
589
+ const std::vector<std::vector<float>>& anchors,
590
+ int batch, int num_anchors, int num_coords,
591
+ float x_scale, float y_scale, float w_scale, float h_scale,
592
+ int num_keypoints)
593
+ {
594
+ std::vector<std::vector<std::vector<float>>> decoded_boxes(batch,
595
+ std::vector<std::vector<float>>(num_anchors, std::vector<float>(num_coords, 0)));
596
+
597
+ for (int b = 0; b < batch; ++b) {
598
+ for (int i = 0; i < num_anchors; ++i) {
599
+ int base = b * num_anchors * num_coords + i * num_coords;
600
+
601
+ float x_center = raw_boxes[base + 0] / x_scale * anchors[i][2] + anchors[i][0];
602
+ float y_center = raw_boxes[base + 1] / y_scale * anchors[i][3] + anchors[i][1];
603
+ float w = raw_boxes[base + 2] / w_scale * anchors[i][2];
604
+ float h = raw_boxes[base + 3] / h_scale * anchors[i][3];
605
+
606
+ decoded_boxes[b][i][0] = y_center - h / 2.0f; // ymin
607
+ decoded_boxes[b][i][1] = x_center - w / 2.0f; // xmin
608
+ decoded_boxes[b][i][2] = y_center + h / 2.0f; // ymax
609
+ decoded_boxes[b][i][3] = x_center + w / 2.0f; // xmax
610
+
611
+ for (int k = 0; k < num_keypoints; ++k) {
612
+ int offset = 4 + k * 2;
613
+ float keypoint_x = raw_boxes[base + offset] / x_scale * anchors[i][2] + anchors[i][0];
614
+ float keypoint_y = raw_boxes[base + offset + 1] / y_scale * anchors[i][3] + anchors[i][1];
615
+ decoded_boxes[b][i][offset] = keypoint_x;
616
+ decoded_boxes[b][i][offset + 1] = keypoint_y;
617
+ }
618
+ }
619
+ }
620
+
621
+ return decoded_boxes;
622
+ }
623
+
624
+ std::vector<std::vector<std::vector<float>>> tensors_to_detections(
625
+ const std::vector<float>& raw_box_tensor,
626
+ const std::vector<float>& raw_score_tensor,
627
+ const std::vector<std::vector<float>>& anchors,
628
+ int batch, int num_anchors, int num_coords, int num_classes, int num_keypoints,
629
+ float x_scale, float y_scale, float w_scale, float h_scale,
630
+ float score_clipping_thresh, float min_score_thresh)
631
+ {
632
+ assert(raw_box_tensor.size() == batch * num_anchors * num_coords);
633
+ assert(raw_score_tensor.size() == batch * num_anchors * num_classes);
634
+ assert(anchors.size() == size_t(num_anchors));
635
+
636
+ auto detection_boxes = decode_boxes(
637
+ raw_box_tensor, anchors, batch, num_anchors, num_coords,
638
+ x_scale, y_scale, w_scale, h_scale, num_keypoints);
639
+
640
+ std::vector<std::vector<std::vector<float>>> output_detections;
641
+
642
+ for (int b = 0; b < batch; ++b) {
643
+ std::vector<std::vector<float>> detections;
644
+
645
+ for (int i = 0; i < num_anchors; ++i) {
646
+ int score_index = b * num_anchors * num_classes + i * num_classes;
647
+
648
+ // 单类情况,取第0类
649
+ float score_raw = raw_score_tensor[score_index];
650
+ float score = sigmoid(clamp(score_raw, -score_clipping_thresh, score_clipping_thresh));
651
+
652
+ if (score >= min_score_thresh) {
653
+ std::vector<float> det = detection_boxes[b][i]; // shape [num_coords]
654
+ det.push_back(score); // 追加置信度
655
+ detections.push_back(det); // shape [num_coords+1]
656
+ }
657
+ }
658
+
659
+ output_detections.push_back(detections); // 每个 batch 一个 vector
660
+ }
661
+
662
+ return output_detections;
663
+ }
664
+
665
+
666
+ int invoke(const Args& args) {
667
+ std::cout << "Start main ... ... Model Path: " << args.faceDetector_model << "\n"
668
+ << args.faceLandmark_model << "\n"
669
+ << "Image Path: " << args.imgs << "\n"
670
+ << "Inference Nums: " << args.invoke_nums << "\n"
671
+ << "Model Type: " << args.model_type << "\n";
672
+ // =============================================================faceDetector_model start
673
+ Model* model1 = Model::create_instance(args.faceDetector_model);
674
+ if(model1 == nullptr){
675
+ printf("Create model1 failed !\n");
676
+ return EXIT_FAILURE;
677
+ }
678
+ Config* config1 = Config::create_instance();
679
+ if(config1 == nullptr){
680
+ printf("Create config1 failed !\n");
681
+ return EXIT_FAILURE;
682
+ }
683
+ config1->implement_type = ImplementType::TYPE_LOCAL;
684
+ std::string model_type_lower1 = to_lower(args.model_type);
685
+ if (model_type_lower1 == "qnn"){
686
+ config1->framework_type = FrameworkType::TYPE_QNN;
687
+ } else if (model_type_lower1 == "snpe2" || model_type_lower1 == "snpe") {
688
+ config1->framework_type = FrameworkType::TYPE_SNPE2;
689
+ }
690
+ config1->accelerate_type = AccelerateType::TYPE_DSP;
691
+ config1->is_quantify_model = 1;
692
+
693
+ std::vector<std::vector<uint32_t>> input_shapes1 = {{1,3,256,256}};
694
+ std::vector<std::vector<uint32_t>> output_shapes1 = {{1,896,16},{1,896,1}};
695
+ model1->set_model_properties(input_shapes1, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes1, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
696
+ std::unique_ptr<Interpreter> fast_interpreter1 = InterpreterBuilder::build_interpretper_from_model_and_config(model1, config1);
697
+ if(fast_interpreter1 == nullptr){
698
+ printf("build_interpretper_from_model_and_config failed !\n");
699
+ return EXIT_FAILURE;
700
+ }
701
+ int result = fast_interpreter1->init();
702
+ if(result != EXIT_SUCCESS){
703
+ printf("interpreter->init() failed !\n");
704
+ return EXIT_FAILURE;
705
+ }
706
+ // load model
707
+ fast_interpreter1->load_model();
708
+ if(result != EXIT_SUCCESS){
709
+ printf("interpreter->load_model() failed !\n");
710
+ return EXIT_FAILURE;
711
+ }
712
+ printf("detect model load success!\n");
713
+ // =============================================================faceDetector_model over
714
+
715
+ // =============================================================faceLandmark_model start
716
+ Model* model2 = Model::create_instance(args.faceLandmark_model);
717
+ if(model2 == nullptr){
718
+ printf("Create model2 failed !\n");
719
+ return EXIT_FAILURE;
720
+ }
721
+ Config* config2 = Config::create_instance();
722
+ if(config2 == nullptr){
723
+ printf("Create config2 failed !\n");
724
+ return EXIT_FAILURE;
725
+ }
726
+ config2->implement_type = ImplementType::TYPE_LOCAL;
727
+ std::string model_type_lower2 = to_lower(args.model_type);
728
+ if (model_type_lower2 == "qnn"){
729
+ config2->framework_type = FrameworkType::TYPE_QNN;
730
+ } else if (model_type_lower2 == "snpe2" || model_type_lower2 == "snpe") {
731
+ config2->framework_type = FrameworkType::TYPE_SNPE2;
732
+ }
733
+ config2->accelerate_type = AccelerateType::TYPE_DSP;
734
+ config2->is_quantify_model = 1;
735
+
736
+ std::vector<std::vector<uint32_t>> input_shapes2 = {{1,3,192,192}};
737
+ std::vector<std::vector<uint32_t>> output_shapes2 = {{1},{1,468,3}};
738
+ model2->set_model_properties(input_shapes2, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes2, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
739
+ std::unique_ptr<Interpreter> fast_interpreter2 = InterpreterBuilder::build_interpretper_from_model_and_config(model2, config2);
740
+ if(fast_interpreter2 == nullptr){
741
+ printf("build_interpretper_from_model_and_config2 failed !\n");
742
+ return EXIT_FAILURE;
743
+ }
744
+ result = fast_interpreter2->init();
745
+ if(result != EXIT_SUCCESS){
746
+ printf("interpreter2->init() failed !\n");
747
+ return EXIT_FAILURE;
748
+ }
749
+ // load model
750
+ fast_interpreter2->load_model();
751
+ if(result != EXIT_SUCCESS){
752
+ printf("interpreter2->load_model() failed !\n");
753
+ return EXIT_FAILURE;
754
+ }
755
+ printf("detect model2 load success!\n");
756
+ // =============================================================faceLandmark_model over
757
+
758
+
759
+ auto anchors = load_anchors_from_npy("../anchors_float32.npy");
760
+ cv::Mat frame = cv::imread(args.imgs);
761
+ if (frame.empty()) {
762
+ printf("detect image load failed!\n");
763
+ return 1;
764
+ }
765
+ // printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
766
+ cv::Mat input_data;
767
+ cv::Mat frame_clone1 = frame.clone();
768
+ cv::cvtColor(frame_clone1, frame_clone1, cv::COLOR_BGR2RGB);
769
+ cv::Mat frame_clone = frame.clone();
770
+
771
+
772
+ cv::Mat img1, img2;
773
+ float scale;
774
+ cv::Point pad;
775
+ std::tie(img1, img2, scale, pad) = resize_pad(frame_clone1);
776
+ std::vector<float> input_tensor = preprocess_image(img1);
777
+
778
+ float *outdata0 = nullptr;
779
+ float *outdata1 = nullptr;
780
+ std::vector<float> invoke_time;
781
+ for (int i = 0; i < args.invoke_nums; ++i) {
782
+ result = fast_interpreter1->set_input_tensor(0, input_tensor.data());
783
+ if(result != EXIT_SUCCESS){
784
+ printf("interpreter->set_input_tensor() failed !\n");
785
+ return EXIT_FAILURE;
786
+ }
787
+ auto t1 = std::chrono::high_resolution_clock::now();
788
+ result = fast_interpreter1->invoke();
789
+ auto t2 = std::chrono::high_resolution_clock::now();
790
+ std::chrono::duration<double> cost_time = t2 - t1;
791
+ invoke_time.push_back(cost_time.count() * 1000);
792
+ if(result != EXIT_SUCCESS){
793
+ printf("interpreter->invoke() failed !\n");
794
+ return EXIT_FAILURE;
795
+ }
796
+ uint32_t out_data_0 = 0;
797
+ result = fast_interpreter1->get_output_tensor(0, (void**)&outdata0, &out_data_0);
798
+ if(result != EXIT_SUCCESS){
799
+ printf("interpreter1->get_output_tensor() 0 failed !\n");
800
+ return EXIT_FAILURE;
801
+ }
802
+
803
+ uint32_t out_data_1 = 0;
804
+ result = fast_interpreter1->get_output_tensor(1, (void**)&outdata1, &out_data_1);
805
+ if(result != EXIT_SUCCESS){
806
+ printf("interpreter1->get_output_tensor() 1 failed !\n");
807
+ return EXIT_FAILURE;
808
+ }
809
+
810
+ }
811
+
812
+ std::vector<float> tensor_1_896_16(outdata0, outdata0 + 896*16);
813
+ std::vector<float> tensor_1_896_1(outdata1, outdata1 + 896*1);
814
+
815
+ std::vector<std::vector<std::vector<float>>> detections = tensors_to_detections(
816
+ tensor_1_896_16, tensor_1_896_1, anchors,
817
+ 1, 896, 16, 1, 6,
818
+ 256.0f, 256.0f, 256.0f, 256.0f,
819
+ 100.0f, 0.4f);
820
+
821
+
822
+ std::vector<std::vector<std::vector<float>>> filtered_detections;
823
+ for (size_t i = 0; i < detections.size(); ++i) {
824
+ std::vector<std::vector<float>>& dets = detections[i];
825
+ std::vector<std::vector<float>> faces = weighted_non_max_suppression(dets);
826
+ filtered_detections.push_back(faces);
827
+ }
828
+
829
+
830
+ // std::cout << "filtered_detections size: " << filtered_detections.size() << "\n";
831
+ // std::cout << "scale: " << scale << ", pad: (" << pad.x << ", " << pad.y << ")\n";
832
+ std::vector<std::vector<float>> face_detections = denormalize_detections(filtered_detections[0], scale, pad);
833
+
834
+ // std::cout << "face_detections size: " << face_detections.size() << "\n";
835
+ std::vector<float> xc, yc, scales, theta;
836
+ int kp1 = 0, kp2 = 1; // 关键点索引
837
+ float dy = 0.0f; // 根据模型定义设定
838
+ float dscale = 1.5f; // 缩放因子
839
+ float theta0 = 0.0f; // 基准角度
840
+
841
+ detection2roi(face_detections, xc, yc, scales, theta, kp1, kp2, dy, dscale, theta0);
842
+ std::vector<cv::Mat> rois;
843
+ std::vector<cv::Mat> affines;
844
+ std::vector<std::vector<cv::Point2f>> boxes;
845
+
846
+ extract_roi(frame_clone1, xc, yc, theta, scales, rois, affines, boxes);
847
+ if (!boxes.empty()) {
848
+ std::cout << "Detected " << boxes.size() << " faces.\n";
849
+ // 检测到人脸,继续处理 boxes[0] ...
850
+ std::vector<float> input_tensor = preprocess_imgs_to_nchw(rois);
851
+
852
+ float *outdata1_0 = nullptr;
853
+ float *outdata1_1 = nullptr;
854
+
855
+ result = fast_interpreter2->set_input_tensor(0, input_tensor.data());
856
+ if(result != EXIT_SUCCESS){
857
+ printf("interpreter2->set_input_tensor() failed !\n");
858
+ return EXIT_FAILURE;
859
+ }
860
+ auto t1 = std::chrono::high_resolution_clock::now();
861
+ result = fast_interpreter2->invoke();
862
+ auto t2 = std::chrono::high_resolution_clock::now();
863
+ std::chrono::duration<double> cost_time = t2 - t1;
864
+ invoke_time.push_back(cost_time.count() * 1000);
865
+ if(result != EXIT_SUCCESS){
866
+ printf("interpreter2->invoke() failed !\n");
867
+ return EXIT_FAILURE;
868
+ }
869
+ uint32_t out_data_1_0 = 0;
870
+ result = fast_interpreter2->get_output_tensor(0, (void**)&outdata1_0, &out_data_1_0);
871
+ if(result != EXIT_SUCCESS){
872
+ printf("interpreter2->get_output_tensor() 0 failed !\n");
873
+ return EXIT_FAILURE;
874
+ }
875
+
876
+ uint32_t out_data_1_1 = 0;
877
+ result = fast_interpreter2->get_output_tensor(1, (void**)&outdata1_1, &out_data_1_1);
878
+ if(result != EXIT_SUCCESS){
879
+ printf("interpreter2->get_output_tensor() 1 failed !\n");
880
+ return EXIT_FAILURE;
881
+ }
882
+
883
+ std::vector<float> flags(outdata1_0, outdata1_0 + 1);
884
+ std::vector<float> normalized_landmarks(outdata1_1, outdata1_1 + 468*3);
885
+
886
+ std::vector<cv::Point2f> landmarks = denormalize_landmarks(normalized_landmarks, affines);
887
+ draw_landmarks(frame_clone1, landmarks, flags, FACE_CONNECTIONS);
888
+ } else {
889
+ std::cout << "not detect face!" << std::endl;
890
+ }
891
+
892
+
893
+ draw_roi(frame_clone1, boxes);
894
+ draw_detections(frame_clone1, face_detections);
895
+ cv::cvtColor(frame_clone1, frame_clone1, cv::COLOR_RGB2BGR);
896
+ cv::imwrite("vis_result.jpg", frame_clone1);
897
+
898
+
899
+ fast_interpreter1->destory();
900
+ fast_interpreter2->destory();
901
+ return 0;
902
+
903
+ }
904
+
905
+
906
+ int main(int argc, char* argv[]) {
907
+ Args args = parse_args(argc, argv);
908
+ return invoke(args);
909
+ }
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/anchors_face_back.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10bb2fb93ab54ca426d6c750bfc3aad685028a16dcf231357d03694f261fd95
3
+ size 28800
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/blazeface_landmark.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c529987e67f82e58a608a394aabf245a3afa19ac2f761981894f70b4df9fdca
3
+ size 2439235
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/blazefaceback.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9658c6459c5d5450d7da9d5fbb74b3beca11157f4cdb35e4d948aa6b4efc0ded
3
+ size 594825
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/m_faceDetctor.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56336b04831d9f9f41bdcddcd4598e5660a2925451ee50da634fea6598ce6620
3
+ size 855238
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/m_faceDetctor_w8a16.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06f7e7016506a415bb7e02aaf9469a5fd406d31bb7349d3ae0fe97f1a0cb3b9a
3
+ size 728616
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/m_faceLandmark.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96374d173e67c5c3690b75d030b729e23e41de6b1a1ebd5daef7ff3992118c54
3
+ size 2643322
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/models/m_faceLandmark_w8a16.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61316298a6690650feea876b64b2efe520940d753af3264202689b12dd1c779e
3
+ size 1096800
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/0000.jpg ADDED

Git LFS Details

  • SHA256: f1b278e84efa32b0e25d982219d31438f74a73b58af62b7f4751df3076221078
  • Pointer size: 131 Bytes
  • Size of remote file: 174 kB
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/blazebase.cpython-38.pyc ADDED
Binary file (16.5 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/blazebase.cpython-39.pyc ADDED
Binary file (16.6 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/blazeface.cpython-39.pyc ADDED
Binary file (4.03 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/blazeface_landmark.cpython-39.pyc ADDED
Binary file (2.14 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/__pycache__/visualization.cpython-38.pyc ADDED
Binary file (4.6 kB). View file
 
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/blazebase.py ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+
7
+
8
+ def resize_pad(img):
9
+ """ resize and pad images to be input to the detectors
10
+
11
+ The face and palm detector networks take 256x256 and 128x128 images
12
+ as input. As such the input image is padded and resized to fit the
13
+ size while maintaing the aspect ratio.
14
+
15
+ Returns:
16
+ img1: 256x256
17
+ img2: 128x128
18
+ scale: scale factor between original image and 256x256 image
19
+ pad: pixels of padding in the original image
20
+ """
21
+
22
+ size0 = img.shape
23
+ if size0[0]>=size0[1]:
24
+ h1 = 256
25
+ w1 = 256 * size0[1] // size0[0]
26
+ padh = 0
27
+ padw = 256 - w1
28
+ scale = size0[1] / w1
29
+ else:
30
+ h1 = 256 * size0[0] // size0[1]
31
+ w1 = 256
32
+ padh = 256 - h1
33
+ padw = 0
34
+ scale = size0[0] / h1
35
+ padh1 = padh//2
36
+ padh2 = padh//2 + padh%2
37
+ padw1 = padw//2
38
+ padw2 = padw//2 + padw%2
39
+ img1 = cv2.resize(img, (w1,h1))
40
+ img1 = np.pad(img1, ((padh1, padh2), (padw1, padw2), (0,0)))
41
+ pad = (int(padh1 * scale), int(padw1 * scale))
42
+ img2 = cv2.resize(img1, (128,128))
43
+ return img1, img2, scale, pad
44
+
45
+
46
+ def denormalize_detections(detections, scale, pad):
47
+ """ maps detection coordinates from [0,1] to image coordinates
48
+
49
+ The face and palm detector networks take 256x256 and 128x128 images
50
+ as input. As such the input image is padded and resized to fit the
51
+ size while maintaing the aspect ratio. This function maps the
52
+ normalized coordinates back to the original image coordinates.
53
+
54
+ Inputs:
55
+ detections: nxm tensor. n is the number of detections.
56
+ m is 4+2*k where the first 4 valuse are the bounding
57
+ box coordinates and k is the number of additional
58
+ keypoints output by the detector.
59
+ scale: scalar that was used to resize the image
60
+ pad: padding in the x and y dimensions
61
+
62
+ """
63
+ detections[:, 0] = detections[:, 0] * scale * 256 - pad[0]
64
+ detections[:, 1] = detections[:, 1] * scale * 256 - pad[1]
65
+ detections[:, 2] = detections[:, 2] * scale * 256 - pad[0]
66
+ detections[:, 3] = detections[:, 3] * scale * 256 - pad[1]
67
+
68
+ detections[:, 4::2] = detections[:, 4::2] * scale * 256 - pad[1]
69
+ detections[:, 5::2] = detections[:, 5::2] * scale * 256 - pad[0]
70
+ return detections
71
+
72
+
73
+
74
+
75
+ class BlazeBlock(nn.Module):
76
+ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, act='relu', skip_proj=False):
77
+ super(BlazeBlock, self).__init__()
78
+
79
+ self.stride = stride
80
+ self.kernel_size = kernel_size
81
+ self.channel_pad = out_channels - in_channels
82
+
83
+ # TFLite uses slightly different padding than PyTorch
84
+ # on the depthwise conv layer when the stride is 2.
85
+ if stride == 2:
86
+ self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
87
+ padding = 0
88
+ else:
89
+ padding = (kernel_size - 1) // 2
90
+
91
+ self.convs = nn.Sequential(
92
+ nn.Conv2d(in_channels=in_channels, out_channels=in_channels,
93
+ kernel_size=kernel_size, stride=stride, padding=padding,
94
+ groups=in_channels, bias=True),
95
+ nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
96
+ kernel_size=1, stride=1, padding=0, bias=True),
97
+ )
98
+
99
+ if skip_proj:
100
+ self.skip_proj = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
101
+ kernel_size=1, stride=1, padding=0, bias=True)
102
+ else:
103
+ self.skip_proj = None
104
+
105
+ if act == 'relu':
106
+ self.act = nn.ReLU(inplace=True)
107
+ elif act == 'prelu':
108
+ self.act = nn.PReLU(out_channels)
109
+ else:
110
+ raise NotImplementedError("unknown activation %s"%act)
111
+
112
+ def forward(self, x):
113
+ if self.stride == 2:
114
+ if self.kernel_size==3:
115
+ h = F.pad(x, (0, 2, 0, 2), "constant", 0)
116
+ else:
117
+ h = F.pad(x, (1, 2, 1, 2), "constant", 0)
118
+ x = self.max_pool(x)
119
+ else:
120
+ h = x
121
+
122
+ if self.skip_proj is not None:
123
+ x = self.skip_proj(x)
124
+ elif self.channel_pad > 0:
125
+ x = F.pad(x, (0, 0, 0, 0, 0, self.channel_pad), "constant", 0)
126
+
127
+
128
+ return self.act(self.convs(h) + x)
129
+
130
+
131
+ class FinalBlazeBlock(nn.Module):
132
+ def __init__(self, channels, kernel_size=3):
133
+ super(FinalBlazeBlock, self).__init__()
134
+
135
+ # TFLite uses slightly different padding than PyTorch
136
+ # on the depthwise conv layer when the stride is 2.
137
+ self.convs = nn.Sequential(
138
+ nn.Conv2d(in_channels=channels, out_channels=channels,
139
+ kernel_size=kernel_size, stride=2, padding=0,
140
+ groups=channels, bias=True),
141
+ nn.Conv2d(in_channels=channels, out_channels=channels,
142
+ kernel_size=1, stride=1, padding=0, bias=True),
143
+ )
144
+
145
+ self.act = nn.ReLU(inplace=True)
146
+
147
+ def forward(self, x):
148
+ h = F.pad(x, (0, 2, 0, 2), "constant", 0)
149
+
150
+ return self.act(self.convs(h))
151
+
152
+
153
+ class BlazeBase(nn.Module):
154
+ """ Base class for media pipe models. """
155
+
156
+ def _device(self):
157
+ """Which device (CPU or GPU) is being used by this model?"""
158
+ return self.classifier_8.weight.device
159
+
160
+ def load_weights(self, path):
161
+ self.load_state_dict(torch.load(path))
162
+ self.eval()
163
+
164
+
165
+ class BlazeLandmark(BlazeBase):
166
+ """ Base class for landmark models. """
167
+
168
+ def extract_roi(self, frame, xc, yc, theta, scale):
169
+
170
+ # take points on unit square and transform them according to the roi
171
+ points = torch.tensor([[-1, -1, 1, 1],
172
+ [-1, 1, -1, 1]], device=scale.device).view(1,2,4)
173
+ points = points * scale.view(-1,1,1)/2
174
+ theta = theta.view(-1, 1, 1)
175
+ R = torch.cat((
176
+ torch.cat((torch.cos(theta), -torch.sin(theta)), 2),
177
+ torch.cat((torch.sin(theta), torch.cos(theta)), 2),
178
+ ), 1)
179
+ center = torch.cat((xc.view(-1,1,1), yc.view(-1,1,1)), 1)
180
+ points = R @ points + center
181
+
182
+ # use the points to compute the affine transform that maps
183
+ # these points back to the output square
184
+ res = self.resolution
185
+ points1 = np.array([[0, 0, res-1],
186
+ [0, res-1, 0]], dtype=np.float32).T
187
+ affines = []
188
+ imgs = []
189
+ for i in range(points.shape[0]):
190
+ pts = points[i, :, :3].cpu().numpy().T
191
+ M = cv2.getAffineTransform(pts, points1)
192
+ img = cv2.warpAffine(frame, M, (res,res))#, borderValue=127.5)
193
+ img = torch.tensor(img, device=scale.device)
194
+ imgs.append(img)
195
+ affine = cv2.invertAffineTransform(M).astype('float32')
196
+ affine = torch.tensor(affine, device=scale.device)
197
+ affines.append(affine)
198
+ if imgs:
199
+ imgs = torch.stack(imgs).permute(0,3,1,2).float() / 255.#/ 127.5 - 1.0
200
+ affines = torch.stack(affines)
201
+ else:
202
+ imgs = torch.zeros((0, 3, res, res), device=scale.device)
203
+ affines = torch.zeros((0, 2, 3), device=scale.device)
204
+
205
+ return imgs, affines, points
206
+
207
+ def denormalize_landmarks(self, landmarks, affines):
208
+ landmarks[:,:,:2] *= self.resolution
209
+ for i in range(len(landmarks)):
210
+ landmark, affine = landmarks[i], affines[i]
211
+ landmark = (affine[:,:2] @ landmark[:,:2].T + affine[:,2:]).T
212
+ landmarks[i,:,:2] = landmark
213
+ return landmarks
214
+
215
+
216
+
217
+ class BlazeDetector(BlazeBase):
218
+ """ Base class for detector models.
219
+
220
+ Based on code from https://github.com/tkat0/PyTorch_BlazeFace/ and
221
+ https://github.com/hollance/BlazeFace-PyTorch and
222
+ https://github.com/google/mediapipe/
223
+ """
224
+ def load_anchors(self, path):
225
+ self.anchors = torch.tensor(np.load(path), dtype=torch.float32, device=self._device())
226
+ assert(self.anchors.ndimension() == 2)
227
+ assert(self.anchors.shape[0] == self.num_anchors)
228
+ assert(self.anchors.shape[1] == 4)
229
+
230
+ def _preprocess(self, x):
231
+ """Converts the image pixels to the range [-1, 1]."""
232
+ return x.float() / 255.# 127.5 - 1.0
233
+
234
+ def predict_on_image(self, img):
235
+ """Makes a prediction on a single image.
236
+
237
+ Arguments:
238
+ img: a NumPy array of shape (H, W, 3) or a PyTorch tensor of
239
+ shape (3, H, W). The image's height and width should be
240
+ 128 pixels.
241
+
242
+ Returns:
243
+ A tensor with face detections.
244
+ """
245
+ if isinstance(img, np.ndarray):
246
+ img = torch.from_numpy(img).permute((2, 0, 1))
247
+
248
+ return self.predict_on_batch(img.unsqueeze(0))[0]
249
+
250
+ def predict_on_batch(self, x):
251
+ """Makes a prediction on a batch of images.
252
+
253
+ Arguments:
254
+ x: a NumPy array of shape (b, H, W, 3) or a PyTorch tensor of
255
+ shape (b, 3, H, W). The height and width should be 128 pixels.
256
+
257
+ Returns:
258
+ A list containing a tensor of face detections for each image in
259
+ the batch. If no faces are found for an image, returns a tensor
260
+ of shape (0, 17).
261
+
262
+ Each face detection is a PyTorch tensor consisting of 17 numbers:
263
+ - ymin, xmin, ymax, xmax
264
+ - x,y-coordinates for the 6 keypoints
265
+ - confidence score
266
+ """
267
+ if isinstance(x, np.ndarray):
268
+ x = torch.from_numpy(x).permute((0, 3, 1, 2))
269
+
270
+ assert x.shape[1] == 3
271
+ assert x.shape[2] == self.y_scale
272
+ assert x.shape[3] == self.x_scale
273
+
274
+ # 1. Preprocess the images into tensors:
275
+ x = x.to(self._device())
276
+ x = self._preprocess(x)
277
+
278
+ # 2. Run the neural network:
279
+ with torch.no_grad():
280
+ out = self.__call__(x)
281
+
282
+ # 3. Postprocess the raw predictions:
283
+ detections = self._tensors_to_detections(out[0], out[1], self.anchors)
284
+
285
+ # 4. Non-maximum suppression to remove overlapping detections:
286
+ filtered_detections = []
287
+ for i in range(len(detections)):
288
+ faces = self._weighted_non_max_suppression(detections[i])
289
+ faces = torch.stack(faces) if len(faces) > 0 else torch.zeros((0, self.num_coords+1))
290
+ filtered_detections.append(faces)
291
+
292
+ return filtered_detections
293
+
294
+
295
+ def detection2roi(self, detection):
296
+ """ Convert detections from detector to an oriented bounding box.
297
+
298
+ Adapted from:
299
+ # mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt
300
+
301
+ The center and size of the box is calculated from the center
302
+ of the detected box. Rotation is calcualted from the vector
303
+ between kp1 and kp2 relative to theta0. The box is scaled
304
+ and shifted by dscale and dy.
305
+
306
+ """
307
+ if self.detection2roi_method == 'box':
308
+ # compute box center and scale
309
+ # use mediapipe/calculators/util/detections_to_rects_calculator.cc
310
+ xc = (detection[:,1] + detection[:,3]) / 2
311
+ yc = (detection[:,0] + detection[:,2]) / 2
312
+ scale = (detection[:,3] - detection[:,1]) # assumes square boxes
313
+
314
+ elif self.detection2roi_method == 'alignment':
315
+ # compute box center and scale
316
+ # use mediapipe/calculators/util/alignment_points_to_rects_calculator.cc
317
+ xc = detection[:,4+2*self.kp1]
318
+ yc = detection[:,4+2*self.kp1+1]
319
+ x1 = detection[:,4+2*self.kp2]
320
+ y1 = detection[:,4+2*self.kp2+1]
321
+ scale = ((xc-x1)**2 + (yc-y1)**2).sqrt() * 2
322
+ else:
323
+ raise NotImplementedError(
324
+ "detection2roi_method [%s] not supported"%self.detection2roi_method)
325
+
326
+ yc += self.dy * scale
327
+ scale *= self.dscale
328
+
329
+ # compute box rotation
330
+ x0 = detection[:,4+2*self.kp1]
331
+ y0 = detection[:,4+2*self.kp1+1]
332
+ x1 = detection[:,4+2*self.kp2]
333
+ y1 = detection[:,4+2*self.kp2+1]
334
+ #theta = np.arctan2(y0-y1, x0-x1) - self.theta0
335
+ theta = torch.atan2(y0-y1, x0-x1) - self.theta0
336
+ return xc, yc, scale, theta
337
+
338
+
339
+ def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors):
340
+ """The output of the neural network is a tensor of shape (b, 896, 16)
341
+ containing the bounding box regressor predictions, as well as a tensor
342
+ of shape (b, 896, 1) with the classification confidences.
343
+
344
+ This function converts these two "raw" tensors into proper detections.
345
+ Returns a list of (num_detections, 17) tensors, one for each image in
346
+ the batch.
347
+
348
+ This is based on the source code from:
349
+ mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc
350
+ mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto
351
+ """
352
+ assert raw_box_tensor.ndimension() == 3
353
+ assert raw_box_tensor.shape[1] == self.num_anchors
354
+ assert raw_box_tensor.shape[2] == self.num_coords
355
+
356
+ assert raw_score_tensor.ndimension() == 3
357
+ assert raw_score_tensor.shape[1] == self.num_anchors
358
+ assert raw_score_tensor.shape[2] == self.num_classes
359
+
360
+ assert raw_box_tensor.shape[0] == raw_score_tensor.shape[0]
361
+
362
+ detection_boxes = self._decode_boxes(raw_box_tensor, anchors)
363
+
364
+ thresh = self.score_clipping_thresh
365
+ raw_score_tensor = raw_score_tensor.clamp(-thresh, thresh)
366
+ detection_scores = raw_score_tensor.sigmoid().squeeze(dim=-1)
367
+
368
+ # Note: we stripped off the last dimension from the scores tensor
369
+ # because there is only has one class. Now we can simply use a mask
370
+ # to filter out the boxes with too low confidence.
371
+ mask = detection_scores >= self.min_score_thresh
372
+
373
+ # Because each image from the batch can have a different number of
374
+ # detections, process them one at a time using a loop.
375
+ output_detections = []
376
+ for i in range(raw_box_tensor.shape[0]):
377
+ boxes = detection_boxes[i, mask[i]]
378
+ scores = detection_scores[i, mask[i]].unsqueeze(dim=-1)
379
+ output_detections.append(torch.cat((boxes, scores), dim=-1))
380
+
381
+ return output_detections
382
+
383
+ def _decode_boxes(self, raw_boxes, anchors):
384
+ """Converts the predictions into actual coordinates using
385
+ the anchor boxes. Processes the entire batch at once.
386
+ """
387
+ boxes = torch.zeros_like(raw_boxes)
388
+
389
+ x_center = raw_boxes[..., 0] / self.x_scale * anchors[:, 2] + anchors[:, 0]
390
+ y_center = raw_boxes[..., 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
391
+
392
+ w = raw_boxes[..., 2] / self.w_scale * anchors[:, 2]
393
+ h = raw_boxes[..., 3] / self.h_scale * anchors[:, 3]
394
+
395
+ boxes[..., 0] = y_center - h / 2. # ymin
396
+ boxes[..., 1] = x_center - w / 2. # xmin
397
+ boxes[..., 2] = y_center + h / 2. # ymax
398
+ boxes[..., 3] = x_center + w / 2. # xmax
399
+
400
+ for k in range(self.num_keypoints):
401
+ offset = 4 + k*2
402
+ keypoint_x = raw_boxes[..., offset ] / self.x_scale * anchors[:, 2] + anchors[:, 0]
403
+ keypoint_y = raw_boxes[..., offset + 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
404
+ boxes[..., offset ] = keypoint_x
405
+ boxes[..., offset + 1] = keypoint_y
406
+
407
+ return boxes
408
+
409
+ def _weighted_non_max_suppression(self, detections):
410
+ """The alternative NMS method as mentioned in the BlazeFace paper:
411
+
412
+ "We replace the suppression algorithm with a blending strategy that
413
+ estimates the regression parameters of a bounding box as a weighted
414
+ mean between the overlapping predictions."
415
+
416
+ The original MediaPipe code assigns the score of the most confident
417
+ detection to the weighted detection, but we take the average score
418
+ of the overlapping detections.
419
+
420
+ The input detections should be a Tensor of shape (count, 17).
421
+
422
+ Returns a list of PyTorch tensors, one for each detected face.
423
+
424
+ This is based on the source code from:
425
+ mediapipe/calculators/util/non_max_suppression_calculator.cc
426
+ mediapipe/calculators/util/non_max_suppression_calculator.proto
427
+ """
428
+ if len(detections) == 0: return []
429
+
430
+ output_detections = []
431
+
432
+ # Sort the detections from highest to lowest score.
433
+ remaining = torch.argsort(detections[:, self.num_coords], descending=True)
434
+
435
+ while len(remaining) > 0:
436
+ detection = detections[remaining[0]]
437
+
438
+ # Compute the overlap between the first box and the other
439
+ # remaining boxes. (Note that the other_boxes also include
440
+ # the first_box.)
441
+ first_box = detection[:4]
442
+ other_boxes = detections[remaining, :4]
443
+ ious = overlap_similarity(first_box, other_boxes)
444
+
445
+ # If two detections don't overlap enough, they are considered
446
+ # to be from different faces.
447
+ mask = ious > self.min_suppression_threshold
448
+ overlapping = remaining[mask]
449
+ remaining = remaining[~mask]
450
+
451
+ # Take an average of the coordinates from the overlapping
452
+ # detections, weighted by their confidence scores.
453
+ weighted_detection = detection.clone()
454
+ if len(overlapping) > 1:
455
+ coordinates = detections[overlapping, :self.num_coords]
456
+ scores = detections[overlapping, self.num_coords:self.num_coords+1]
457
+ total_score = scores.sum()
458
+ weighted = (coordinates * scores).sum(dim=0) / total_score
459
+ weighted_detection[:self.num_coords] = weighted
460
+ weighted_detection[self.num_coords] = total_score / len(overlapping)
461
+
462
+ output_detections.append(weighted_detection)
463
+
464
+ return output_detections
465
+
466
+
467
+ # IOU code from https://github.com/amdegroot/ssd.pytorch/blob/master/layers/box_utils.py
468
+
469
+ def intersect(box_a, box_b):
470
+ """ We resize both tensors to [A,B,2] without new malloc:
471
+ [A,2] -> [A,1,2] -> [A,B,2]
472
+ [B,2] -> [1,B,2] -> [A,B,2]
473
+ Then we compute the area of intersect between box_a and box_b.
474
+ Args:
475
+ box_a: (tensor) bounding boxes, Shape: [A,4].
476
+ box_b: (tensor) bounding boxes, Shape: [B,4].
477
+ Return:
478
+ (tensor) intersection area, Shape: [A,B].
479
+ """
480
+ A = box_a.size(0)
481
+ B = box_b.size(0)
482
+ max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
483
+ box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
484
+ min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
485
+ box_b[:, :2].unsqueeze(0).expand(A, B, 2))
486
+ inter = torch.clamp((max_xy - min_xy), min=0)
487
+ return inter[:, :, 0] * inter[:, :, 1]
488
+
489
+
490
+ def jaccard(box_a, box_b):
491
+ """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
492
+ is simply the intersection over union of two boxes. Here we operate on
493
+ ground truth boxes and default boxes.
494
+ E.g.:
495
+ A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
496
+ Args:
497
+ box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
498
+ box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
499
+ Return:
500
+ jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
501
+ """
502
+ inter = intersect(box_a, box_b)
503
+ area_a = ((box_a[:, 2]-box_a[:, 0]) *
504
+ (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
505
+ area_b = ((box_b[:, 2]-box_b[:, 0]) *
506
+ (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
507
+ union = area_a + area_b - inter
508
+ return inter / union # [A,B]
509
+
510
+
511
+ def overlap_similarity(box, other_boxes):
512
+ """Computes the IOU between a bounding box and set of other boxes."""
513
+ return jaccard(box.unsqueeze(0), other_boxes).squeeze(0)
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/blazeface.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from blazebase import BlazeDetector, BlazeBlock, FinalBlazeBlock
7
+
8
+
9
+ class BlazeFace(BlazeDetector):
10
+ """The BlazeFace face detection model from MediaPipe.
11
+
12
+ The version from MediaPipe is simpler than the one in the paper;
13
+ it does not use the "double" BlazeBlocks.
14
+
15
+ Because we won't be training this model, it doesn't need to have
16
+ batchnorm layers. These have already been "folded" into the conv
17
+ weights by TFLite.
18
+
19
+ The conversion to PyTorch is fairly straightforward, but there are
20
+ some small differences between TFLite and PyTorch in how they handle
21
+ padding on conv layers with stride 2.
22
+
23
+ This version works on batches, while the MediaPipe version can only
24
+ handle a single image at a time.
25
+
26
+ Based on code from https://github.com/tkat0/PyTorch_BlazeFace/ and
27
+ https://github.com/hollance/BlazeFace-PyTorch and
28
+ https://github.com/google/mediapipe/
29
+
30
+ """
31
+ def __init__(self, back_model=False):
32
+ super(BlazeFace, self).__init__()
33
+
34
+ # These are the settings from the MediaPipe example graph
35
+ # mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt
36
+ self.num_classes = 1
37
+ self.num_anchors = 896
38
+ self.num_coords = 16
39
+ self.score_clipping_thresh = 100.0
40
+ self.back_model = back_model
41
+ if back_model:
42
+ self.x_scale = 256.0
43
+ self.y_scale = 256.0
44
+ self.h_scale = 256.0
45
+ self.w_scale = 256.0
46
+ self.min_score_thresh = 0.65
47
+ else:
48
+ self.x_scale = 128.0
49
+ self.y_scale = 128.0
50
+ self.h_scale = 128.0
51
+ self.w_scale = 128.0
52
+ self.min_score_thresh = 0.75
53
+ self.min_suppression_threshold = 0.3
54
+ self.num_keypoints = 6
55
+
56
+ # These settings are for converting detections to ROIs which can then
57
+ # be extracted and feed into the landmark network
58
+ # use mediapipe/calculators/util/detections_to_rects_calculator.cc
59
+ self.detection2roi_method = 'box'
60
+ # mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt
61
+ self.kp1 = 1
62
+ self.kp2 = 0
63
+ self.theta0 = 0.
64
+ self.dscale = 1.5
65
+ self.dy = 0.
66
+
67
+ self._define_layers()
68
+
69
+ def _define_layers(self):
70
+ if self.back_model:
71
+ self.backbone = nn.Sequential(
72
+ nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0, bias=True),
73
+ nn.ReLU(inplace=True),
74
+
75
+ BlazeBlock(24, 24),
76
+ BlazeBlock(24, 24),
77
+ BlazeBlock(24, 24),
78
+ BlazeBlock(24, 24),
79
+ BlazeBlock(24, 24),
80
+ BlazeBlock(24, 24),
81
+ BlazeBlock(24, 24),
82
+ BlazeBlock(24, 24, stride=2),
83
+ BlazeBlock(24, 24),
84
+ BlazeBlock(24, 24),
85
+ BlazeBlock(24, 24),
86
+ BlazeBlock(24, 24),
87
+ BlazeBlock(24, 24),
88
+ BlazeBlock(24, 24),
89
+ BlazeBlock(24, 24),
90
+ BlazeBlock(24, 48, stride=2),
91
+ BlazeBlock(48, 48),
92
+ BlazeBlock(48, 48),
93
+ BlazeBlock(48, 48),
94
+ BlazeBlock(48, 48),
95
+ BlazeBlock(48, 48),
96
+ BlazeBlock(48, 48),
97
+ BlazeBlock(48, 48),
98
+ BlazeBlock(48, 96, stride=2),
99
+ BlazeBlock(96, 96),
100
+ BlazeBlock(96, 96),
101
+ BlazeBlock(96, 96),
102
+ BlazeBlock(96, 96),
103
+ BlazeBlock(96, 96),
104
+ BlazeBlock(96, 96),
105
+ BlazeBlock(96, 96),
106
+ )
107
+ self.final = FinalBlazeBlock(96)
108
+ self.classifier_8 = nn.Conv2d(96, 2, 1, bias=True)
109
+ self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
110
+
111
+ self.regressor_8 = nn.Conv2d(96, 32, 1, bias=True)
112
+ self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
113
+ else:
114
+ self.backbone1 = nn.Sequential(
115
+ nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0, bias=True),
116
+ nn.ReLU(inplace=True),
117
+
118
+ BlazeBlock(24, 24),
119
+ BlazeBlock(24, 28),
120
+ BlazeBlock(28, 32, stride=2),
121
+ BlazeBlock(32, 36),
122
+ BlazeBlock(36, 42),
123
+ BlazeBlock(42, 48, stride=2),
124
+ BlazeBlock(48, 56),
125
+ BlazeBlock(56, 64),
126
+ BlazeBlock(64, 72),
127
+ BlazeBlock(72, 80),
128
+ BlazeBlock(80, 88),
129
+ )
130
+
131
+ self.backbone2 = nn.Sequential(
132
+ BlazeBlock(88, 96, stride=2),
133
+ BlazeBlock(96, 96),
134
+ BlazeBlock(96, 96),
135
+ BlazeBlock(96, 96),
136
+ BlazeBlock(96, 96),
137
+ )
138
+
139
+ self.classifier_8 = nn.Conv2d(88, 2, 1, bias=True)
140
+ self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
141
+
142
+ self.regressor_8 = nn.Conv2d(88, 32, 1, bias=True)
143
+ self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
144
+
145
+ def forward(self, x):
146
+ # TFLite uses slightly different padding on the first conv layer
147
+ # than PyTorch, so do it manually.
148
+ x = F.pad(x, (1, 2, 1, 2), "constant", 0)
149
+
150
+ b = x.shape[0] # batch size, needed for reshaping later
151
+
152
+ if self.back_model:
153
+ x = self.backbone(x) # (b, 16, 16, 96)
154
+ h = self.final(x) # (b, 8, 8, 96)
155
+ else:
156
+ x = self.backbone1(x) # (b, 88, 16, 16)
157
+ h = self.backbone2(x) # (b, 96, 8, 8)
158
+
159
+ # Note: Because PyTorch is NCHW but TFLite is NHWC, we need to
160
+ # permute the output from the conv layers before reshaping it.
161
+
162
+ c1 = self.classifier_8(x) # (b, 2, 16, 16)
163
+ c1 = c1.permute(0, 2, 3, 1) # (b, 16, 16, 2)
164
+ c1 = c1.reshape(b, -1, 1) # (b, 512, 1)
165
+
166
+ c2 = self.classifier_16(h) # (b, 6, 8, 8)
167
+ c2 = c2.permute(0, 2, 3, 1) # (b, 8, 8, 6)
168
+ c2 = c2.reshape(b, -1, 1) # (b, 384, 1)
169
+
170
+ c = torch.cat((c1, c2), dim=1) # (b, 896, 1)
171
+
172
+ r1 = self.regressor_8(x) # (b, 32, 16, 16)
173
+ r1 = r1.permute(0, 2, 3, 1) # (b, 16, 16, 32)
174
+ r1 = r1.reshape(b, -1, 16) # (b, 512, 16)
175
+
176
+ r2 = self.regressor_16(h) # (b, 96, 8, 8)
177
+ r2 = r2.permute(0, 2, 3, 1) # (b, 8, 8, 96)
178
+ r2 = r2.reshape(b, -1, 16) # (b, 384, 16)
179
+
180
+ r = torch.cat((r1, r2), dim=1) # (b, 896, 16)
181
+ return [r, c]
182
+
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/blazeface_landmark.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from blazebase import BlazeLandmark, BlazeBlock
7
+
8
+ class BlazeFaceLandmark(BlazeLandmark):
9
+ """The face landmark model from MediaPipe.
10
+
11
+ """
12
+ def __init__(self):
13
+ super(BlazeFaceLandmark, self).__init__()
14
+
15
+ # size of ROIs used for input
16
+ self.resolution = 192
17
+
18
+ self._define_layers()
19
+
20
+ def _define_layers(self):
21
+ self.backbone1 = nn.Sequential(
22
+ nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=2, padding=0, bias=True),
23
+ nn.PReLU(16),
24
+
25
+ BlazeBlock(16, 16, 3, act='prelu'),
26
+ BlazeBlock(16, 16, 3, act='prelu'),
27
+ BlazeBlock(16, 32, 3, 2, act='prelu'),
28
+
29
+ BlazeBlock(32, 32, 3, act='prelu'),
30
+ BlazeBlock(32, 32, 3, act='prelu'),
31
+ BlazeBlock(32, 64, 3, 2, act='prelu'),
32
+
33
+ BlazeBlock(64, 64, 3, act='prelu'),
34
+ BlazeBlock(64, 64, 3, act='prelu'),
35
+ BlazeBlock(64, 128, 3, 2, act='prelu'),
36
+
37
+ BlazeBlock(128, 128, 3, act='prelu'),
38
+ BlazeBlock(128, 128, 3, act='prelu'),
39
+ BlazeBlock(128, 128, 3, 2, act='prelu'),
40
+
41
+ BlazeBlock(128, 128, 3, act='prelu'),
42
+ BlazeBlock(128, 128, 3, act='prelu'),
43
+ )
44
+
45
+
46
+ self.backbone2a = nn.Sequential(
47
+ BlazeBlock(128, 128, 3, 2, act='prelu'),
48
+ BlazeBlock(128, 128, 3, act='prelu'),
49
+ BlazeBlock(128, 128, 3, act='prelu'),
50
+ nn.Conv2d(128, 32, 1, padding=0, bias=True),
51
+ nn.PReLU(32),
52
+ BlazeBlock(32, 32, 3, act='prelu'),
53
+ nn.Conv2d(32, 1404, 3, padding=0, bias=True)
54
+ )
55
+
56
+ self.backbone2b = nn.Sequential(
57
+ BlazeBlock(128, 128, 3, 2, act='prelu'),
58
+ nn.Conv2d(128, 32, 1, padding=0, bias=True),
59
+ nn.PReLU(32),
60
+ BlazeBlock(32, 32, 3, act='prelu'),
61
+ nn.Conv2d(32, 1, 3, padding=0, bias=True)
62
+ )
63
+
64
+ def forward(self, x):
65
+ if x.shape[0] == 0:
66
+ return torch.zeros((0,)), torch.zeros((0, 468, 3))
67
+
68
+ x = F.pad(x, (0, 1, 0, 1), "constant", 0)
69
+
70
+ x = self.backbone1(x)
71
+ landmarks = self.backbone2a(x).view(-1, 468, 3) / 192
72
+ flag = self.backbone2b(x).sigmoid().view(-1)
73
+
74
+ return flag, landmarks
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/coco.jpg ADDED

Git LFS Details

  • SHA256: f7d96313cff4ba7511af36d8dbc358dd33b2815ec378680a09b97353cadb2378
  • Pointer size: 131 Bytes
  • Size of remote file: 159 kB
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ from blazebase import resize_pad, denormalize_detections
6
+ from visualization import draw_landmarks, draw_roi, FACE_CONNECTIONS
7
+ import time
8
+ import aidlite
9
+ import os
10
+
11
+ class post_mediapipe_face:
12
+ def __init__(self):
13
+ self.kp1 = 1
14
+ self.kp2 = 0
15
+ self.theta0 = 0.
16
+ self.dscale = 1.5
17
+ self.dy = 0.
18
+ self.x_scale = 256.0
19
+ self.y_scale = 256.0
20
+ self.h_scale = 256.0
21
+ self.w_scale = 256.0
22
+ self.num_keypoints = 6
23
+ self.num_classes = 1
24
+ self.num_anchors = 896
25
+ self.num_coords = 16
26
+ self.min_score_thresh = 0.4 #0.65
27
+ self.score_clipping_thresh = 100.0
28
+ self.min_suppression_threshold = 0.3
29
+ self.resolution = 192
30
+
31
+
32
+ def detection2roi(self,detection):
33
+ xc = (detection[:,1] + detection[:,3]) / 2
34
+ yc = (detection[:,0] + detection[:,2]) / 2
35
+ scale = (detection[:,3] - detection[:,1]) # assumes square boxes
36
+ yc += self.dy * scale
37
+ scale *= self.dscale
38
+ # compute box rotation
39
+ x0 = detection[:,4+2*self.kp1]
40
+ y0 = detection[:,4+2*self.kp1+1]
41
+ x1 = detection[:,4+2*self.kp2]
42
+ y1 = detection[:,4+2*self.kp2+1]
43
+ theta = torch.atan2(y0-y1, x0-x1) - self.theta0
44
+ return xc, yc, scale, theta
45
+
46
+ def _decode_boxes( self,raw_boxes, anchors):
47
+ boxes = torch.zeros_like(raw_boxes)
48
+
49
+ x_center = raw_boxes[..., 0] / self.x_scale * anchors[:, 2] + anchors[:, 0]
50
+ y_center = raw_boxes[..., 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
51
+
52
+ w = raw_boxes[..., 2] / self.w_scale * anchors[:, 2]
53
+ h = raw_boxes[..., 3] / self.h_scale * anchors[:, 3]
54
+
55
+ boxes[..., 0] = y_center - h / 2. # ymin
56
+ boxes[..., 1] = x_center - w / 2. # xmin
57
+ boxes[..., 2] = y_center + h / 2. # ymax
58
+ boxes[..., 3] = x_center + w / 2. # xmax
59
+
60
+ for k in range(self.num_keypoints):
61
+ offset = 4 + k*2
62
+ keypoint_x = raw_boxes[..., offset ] / self.x_scale * anchors[:, 2] + anchors[:, 0]
63
+ keypoint_y = raw_boxes[..., offset + 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
64
+ boxes[..., offset ] = keypoint_x
65
+ boxes[..., offset + 1] = keypoint_y
66
+ return boxes
67
+
68
+ def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors):
69
+ assert raw_box_tensor.ndimension() == 3
70
+ assert raw_box_tensor.shape[1] == self.num_anchors
71
+ assert raw_box_tensor.shape[2] == self.num_coords
72
+
73
+ assert raw_score_tensor.ndimension() == 3
74
+ assert raw_score_tensor.shape[1] == self.num_anchors
75
+ assert raw_score_tensor.shape[2] == self.num_classes
76
+
77
+ assert raw_box_tensor.shape[0] == raw_score_tensor.shape[0]
78
+
79
+ detection_boxes = self._decode_boxes(raw_box_tensor, anchors)
80
+
81
+ thresh = self.score_clipping_thresh
82
+ raw_score_tensor = raw_score_tensor.clamp(-thresh, thresh)
83
+ detection_scores = raw_score_tensor.sigmoid().squeeze(dim=-1)
84
+
85
+ # Note: we stripped off the last dimension from the scores tensor
86
+ # because there is only has one class. Now we can simply use a mask
87
+ # to filter out the boxes with too low confidence.
88
+ mask = detection_scores >= self.min_score_thresh
89
+
90
+ # Because each image from the batch can have a different number of
91
+ # detections, process them one at a time using a loop.
92
+ output_detections = []
93
+ for i in range(raw_box_tensor.shape[0]):
94
+ boxes = detection_boxes[i, mask[i]]
95
+ scores = detection_scores[i, mask[i]].unsqueeze(dim=-1)
96
+ output_detections.append(torch.cat((boxes, scores), dim=-1))
97
+
98
+ return output_detections
99
+
100
+ def extract_roi( self,frame, xc, yc, theta, scale):
101
+ resolution = 192
102
+ # take points on unit square and transform them according to the roi
103
+ points = torch.tensor([[-1, -1, 1, 1],
104
+ [-1, 1, -1, 1]], device=scale.device).view(1,2,4)
105
+ points = points * scale.view(-1,1,1)/2
106
+ theta = theta.view(-1, 1, 1)
107
+ R = torch.cat((
108
+ torch.cat((torch.cos(theta), -torch.sin(theta)), 2),
109
+ torch.cat((torch.sin(theta), torch.cos(theta)), 2),
110
+ ), 1)
111
+ center = torch.cat((xc.view(-1,1,1), yc.view(-1,1,1)), 1)
112
+ points = R @ points + center
113
+
114
+ # use the points to compute the affine transform that maps
115
+ # these points back to the output square
116
+ res = resolution
117
+ points1 = np.array([[0, 0, res-1],
118
+ [0, res-1, 0]], dtype=np.float32).T
119
+ affines = []
120
+ imgs = []
121
+ for i in range(points.shape[0]):
122
+ pts = points[i, :, :3].detach().numpy().T
123
+ M = cv2.getAffineTransform(pts, points1)
124
+ img = cv2.warpAffine(frame, M, (res,res))#, borderValue=127.5)
125
+ img = torch.tensor(img, device=scale.device)
126
+ imgs.append(img)
127
+ affine = cv2.invertAffineTransform(M).astype('float32')
128
+ affine = torch.tensor(affine, device=scale.device)
129
+ affines.append(affine)
130
+ if imgs:
131
+ imgs = torch.stack(imgs).permute(0,3,1,2).float() / 255.#/ 127.5 - 1.0
132
+ affines = torch.stack(affines)
133
+ else:
134
+ imgs = torch.zeros((0, 3, res, res), device=scale.device)
135
+ affines = torch.zeros((0, 2, 3), device=scale.device)
136
+
137
+ return imgs, affines, points
138
+
139
+ def denormalize_landmarks(self, landmarks, affines):
140
+ landmarks[:,:,:2] *= self.resolution
141
+ for i in range(len(landmarks)):
142
+ landmark, affine = landmarks[i], affines[i]
143
+ landmark = (affine[:,:2] @ landmark[:,:2].T + affine[:,2:]).T
144
+ landmarks[i,:,:2] = landmark
145
+ return landmarks
146
+
147
+ def intersect(self,box_a, box_b):
148
+ A = box_a.size(0)
149
+ B = box_b.size(0)
150
+ max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
151
+ box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
152
+ min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
153
+ box_b[:, :2].unsqueeze(0).expand(A, B, 2))
154
+ inter = torch.clamp((max_xy - min_xy), min=0)
155
+ return inter[:, :, 0] * inter[:, :, 1]
156
+
157
+ def jaccard(self,box_a, box_b):
158
+ inter = self.intersect(box_a, box_b)
159
+ area_a = ((box_a[:, 2]-box_a[:, 0]) *
160
+ (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
161
+ area_b = ((box_b[:, 2]-box_b[:, 0]) *
162
+ (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
163
+ union = area_a + area_b - inter
164
+ return inter / union # [A,B]
165
+
166
+
167
+ def overlap_similarity(self,box, other_boxes):
168
+ """Computes the IOU between a bounding box and set of other boxes."""
169
+ return self.jaccard(box.unsqueeze(0), other_boxes).squeeze(0)
170
+
171
+ def _weighted_non_max_suppression(self,detections):
172
+ if len(detections) == 0: return []
173
+ output_detections = []
174
+
175
+ # Sort the detections from highest to lowest score.
176
+ remaining = torch.argsort(detections[:, num_coords], descending=True)
177
+
178
+ while len(remaining) > 0:
179
+ detection = detections[remaining[0]]
180
+
181
+ # Compute the overlap between the first box and the other
182
+ # remaining boxes. (Note that the other_boxes also include
183
+ # the first_box.)
184
+ first_box = detection[:4]
185
+ other_boxes = detections[remaining, :4]
186
+ ious = self.overlap_similarity(first_box, other_boxes)
187
+
188
+ # If two detections don't overlap enough, they are considered
189
+ # to be from different faces.
190
+ mask = ious > self.min_suppression_threshold
191
+ overlapping = remaining[mask]
192
+ remaining = remaining[~mask]
193
+
194
+ # Take an average of the coordinates from the overlapping
195
+ # detections, weighted by their confidence scores.
196
+ weighted_detection = detection.clone()
197
+ if len(overlapping) > 1:
198
+ coordinates = detections[overlapping, :num_coords]
199
+ scores = detections[overlapping, num_coords:num_coords+1]
200
+ total_score = scores.sum()
201
+ weighted = (coordinates * scores).sum(dim=0) / total_score
202
+ weighted_detection[:num_coords] = weighted
203
+ weighted_detection[num_coords] = total_score / len(overlapping)
204
+
205
+ output_detections.append(weighted_detection)
206
+
207
+ return output_detections
208
+
209
+ def draw_detections(img, detections, with_keypoints=True):
210
+ if isinstance(detections, torch.Tensor):
211
+ detections = detections.detach().numpy()
212
+
213
+ if detections.ndim == 1:
214
+ detections = np.expand_dims(detections, axis=0)
215
+
216
+ n_keypoints = detections.shape[1] // 2 - 2
217
+
218
+ for i in range(detections.shape[0]):
219
+ ymin = detections[i, 0]
220
+ xmin = detections[i, 1]
221
+ ymax = detections[i, 2]
222
+ xmax = detections[i, 3]
223
+
224
+ start_point = (int(xmin), int(ymin))
225
+ end_point = (int(xmax), int(ymax))
226
+ img = cv2.rectangle(img, start_point, end_point, (255, 0, 0), 1)
227
+
228
+ if with_keypoints:
229
+ for k in range(n_keypoints):
230
+ kp_x = int(detections[i, 4 + k*2 ])
231
+ kp_y = int(detections[i, 4 + k*2 + 1])
232
+ cv2.circle(img, (kp_x, kp_y), 2, (0, 0, 255), thickness=2)
233
+ return img
234
+
235
+
236
+
237
+ post_process=post_mediapipe_face()
238
+
239
+ class faceDetectionQnn:
240
+ def __init__(self):
241
+ super().__init__()
242
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_faceDetctor_w8a16.qnn216.ctx.bin"))
243
+ if self.model is None:
244
+ print("Create model failed !")
245
+ return
246
+
247
+ self.config = aidlite.Config.create_instance()
248
+ if self.config is None:
249
+ print("build_interpretper_from_model_and_config failed !")
250
+ return
251
+
252
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
253
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
254
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
255
+ self.config.is_quantify_model = 1
256
+
257
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
258
+ if self.interpreter is None:
259
+ print("build_interpretper_from_model_and_config failed !")
260
+ return
261
+ input_shapes = [[1,3, 256, 256]]
262
+ output_shapes = [[1, 896,16],[1,896,1]]
263
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
264
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
265
+
266
+ if self.interpreter is None:
267
+ print("build_interpretper_from_model_and_config failed !")
268
+ result = self.interpreter.init()
269
+ if result != 0:
270
+ print(f"interpreter init failed !")
271
+ result = self.interpreter.load_model()
272
+ if result != 0:
273
+ print("interpreter load model failed !")
274
+
275
+ print(" model load success!")
276
+
277
+ def __call__(self, input):
278
+ self.interpreter.set_input_tensor(0,input)
279
+ self.interpreter.invoke()
280
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 896,16).copy()
281
+ features_1 = self.interpreter.get_output_tensor(1).reshape(1, 896,1).copy()
282
+ return features_0,features_1
283
+
284
+
285
+ class faceLandmarkQnn:
286
+ def __init__(self):
287
+ super().__init__()
288
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_faceLandmark_w8a16.qnn216.ctx.bin"))
289
+ if self.model is None:
290
+ print("Create model failed !")
291
+ return
292
+
293
+ self.config = aidlite.Config.create_instance()
294
+ if self.config is None:
295
+ print("build_interpretper_from_model_and_config failed !")
296
+ return
297
+
298
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
299
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
300
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
301
+ self.config.is_quantify_model = 1
302
+
303
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
304
+ if self.interpreter is None:
305
+ print("build_interpretper_from_model_and_config failed !")
306
+ return
307
+ input_shapes = [[1, 3, 192, 192]]
308
+ output_shapes = [[1],[1,468,3]]
309
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
310
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
311
+
312
+ if self.interpreter is None:
313
+ print("build_interpretper_from_model_and_config failed !")
314
+ result = self.interpreter.init()
315
+ if result != 0:
316
+ print(f"interpreter init failed !")
317
+ result = self.interpreter.load_model()
318
+ if result != 0:
319
+ print("interpreter load model failed !")
320
+
321
+ print(" model load success!")
322
+
323
+ def __call__(self, input):
324
+ self.interpreter.set_input_tensor(0,input)
325
+ self.interpreter.invoke()
326
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1).copy()
327
+ features_1 = self.interpreter.get_output_tensor(1).reshape(1,468,3).copy()
328
+ return features_0,features_1
329
+
330
+
331
+
332
+ anchors = torch.tensor(np.load(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/anchors_face_back.npy")), dtype=torch.float32, device='cpu')
333
+ face_detc = faceDetectionQnn()
334
+ face_rec = faceLandmarkQnn()
335
+
336
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"coco.jpg")
337
+
338
+ frame_ct=0
339
+ image = cv2.imread(image_path)
340
+
341
+ frame = np.ascontiguousarray(image[:,:,::-1])
342
+
343
+ img1, img2, scale, pad = resize_pad(frame)
344
+
345
+ input = (img1 / 255).astype(np.float32)
346
+ input = np.transpose(input, (2, 0, 1))
347
+ input = input[np.newaxis, ...]
348
+ t0 = time.time()
349
+ out = face_detc(input)
350
+ use_time = round((time.time() - t0) * 1000, 2)
351
+ print(f"face detction inference_time:{use_time} ms")
352
+ detections = post_process._tensors_to_detections(torch.from_numpy(out[0]), torch.from_numpy(out[1]), anchors)
353
+
354
+ filtered_detections = []
355
+ num_coords = 16
356
+ for i in range(len(detections)):
357
+ faces = post_process._weighted_non_max_suppression(detections[i])
358
+ faces = torch.stack(faces) if len(faces) > 0 else torch.zeros((0, num_coords+1))
359
+ filtered_detections.append(faces)
360
+
361
+ face_detections = denormalize_detections(filtered_detections[0], scale, pad)
362
+
363
+ xc, yc, scale, theta = post_process.detection2roi(face_detections)
364
+
365
+ img, affine, box = post_process.extract_roi(frame, xc, yc, theta, scale)
366
+ if box.size()[0]!=0:
367
+ t2 = time.time()
368
+ flags, normalized_landmarks = face_rec(img.numpy())
369
+
370
+ use_time = round((time.time() - t2) * 1000, 2)
371
+ print(f"landmark inference_time:{use_time} ms")
372
+
373
+ landmarks = post_process.denormalize_landmarks(torch.from_numpy(normalized_landmarks), affine)
374
+
375
+ for i in range(len(flags)):
376
+ landmark, flag = landmarks[i], flags[i]
377
+ if flag>.4: # 0.5
378
+ draw_landmarks(frame, landmark[:,:2], FACE_CONNECTIONS, size=1)
379
+ else:
380
+ print("not detect face !")
381
+
382
+ draw_roi(frame, box)
383
+ draw_detections(frame, face_detections)
384
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), frame[:,:,::-1])
385
+ face_detc.interpreter.destory()
386
+ face_rec.interpreter.destory()
387
+
388
+
389
+
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/export_jit.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+ from typing import Callable, Tuple
4
+ from blazeface import BlazeFace
5
+ from blazeface_landmark import BlazeFaceLandmark
6
+
7
+ class FaceDetector(torch.nn.Module):
8
+ def __init__(
9
+ self,
10
+ detector: Callable[[torch.Tensor], Tuple[torch.Tensor, torch.Tensor]],
11
+ anchors: torch.Tensor,
12
+ ):
13
+ super().__init__()
14
+ self.detector = detector
15
+ self.anchors = anchors
16
+
17
+ def forward(self, image):
18
+ return self.detector(image)
19
+
20
+ back_detector = True
21
+ face_detector = BlazeFace(back_model=back_detector)
22
+ face_detector.load_weights(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/blazefaceback.pth"))
23
+ face_detector.load_anchors(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/anchors_face_back.npy"))
24
+ face_detect = FaceDetector(face_detector,face_detector.anchors)
25
+ num_params = sum(p.numel() for p in face_detect.parameters() if p.requires_grad)
26
+ print(f'Number of face_detect parameters: {num_params}')
27
+
28
+ face_d_in = torch.randn(1, 3, 256, 256,dtype= torch.float32)
29
+ source_model = torch.jit.trace(face_detect,face_d_in)
30
+ source_model.save(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/m_faceDetctor.pt"))
31
+ print("export face detect ok!")
32
+
33
+
34
+
35
+
36
+
37
+ class FaceLandmarkDetector(torch.nn.Module):
38
+ def __init__(
39
+ self,
40
+ detector: Callable[[torch.Tensor], Tuple[torch.Tensor, torch.Tensor]],
41
+ ):
42
+ super().__init__()
43
+ self.detector = detector
44
+
45
+ def forward(self, image):
46
+ return self.detector(image)
47
+
48
+ face_regressor = BlazeFaceLandmark()
49
+ face_regressor.load_weights(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/blazeface_landmark.pth"))
50
+ face_regres = FaceLandmarkDetector(face_regressor)
51
+ num_params = sum(p.numel() for p in face_regres.parameters() if p.requires_grad)
52
+ print(f'Number of face_regres parameters: {num_params}')
53
+
54
+ face_r_in = torch.randn(1, 3, 192, 192,dtype= torch.float32)
55
+ source_model = torch.jit.trace(face_regres, face_r_in)
56
+ source_model.save(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../models/m_faceLandmark.pt"))
57
+ print("export face landmark ok!")
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int16_aidlite/python/visualization.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import torch
4
+
5
+ def draw_detections(img, detections, with_keypoints=True):
6
+ if isinstance(detections, torch.Tensor):
7
+ detections = detections.cpu().numpy()
8
+
9
+ if detections.ndim == 1:
10
+ detections = np.expand_dims(detections, axis=0)
11
+
12
+ n_keypoints = detections.shape[1] // 2 - 2
13
+
14
+ for i in range(detections.shape[0]):
15
+ ymin = detections[i, 0]
16
+ xmin = detections[i, 1]
17
+ ymax = detections[i, 2]
18
+ xmax = detections[i, 3]
19
+
20
+ start_point = (int(xmin), int(ymin))
21
+ end_point = (int(xmax), int(ymax))
22
+ img = cv2.rectangle(img, start_point, end_point, (255, 0, 0), 1)
23
+
24
+ if with_keypoints:
25
+ for k in range(n_keypoints):
26
+ kp_x = int(detections[i, 4 + k*2 ])
27
+ kp_y = int(detections[i, 4 + k*2 + 1])
28
+ cv2.circle(img, (kp_x, kp_y), 2, (0, 0, 255), thickness=2)
29
+ return img
30
+
31
+
32
+ def draw_roi(img, roi):
33
+ for i in range(roi.shape[0]):
34
+ (x1,x2,x3,x4), (y1,y2,y3,y4) = roi[i]
35
+ cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,0), 2)
36
+ cv2.line(img, (int(x1), int(y1)), (int(x3), int(y3)), (0,255,0), 2)
37
+ cv2.line(img, (int(x2), int(y2)), (int(x4), int(y4)), (0,0,0), 2)
38
+ cv2.line(img, (int(x3), int(y3)), (int(x4), int(y4)), (0,0,0), 2)
39
+
40
+
41
+ def draw_landmarks(img, points, connections=[], color=(0, 255, 0), size=2):
42
+ points = points[:,:2]
43
+ for point in points:
44
+ x, y = point
45
+ x, y = int(x), int(y)
46
+ cv2.circle(img, (x, y), size, color, thickness=size)
47
+ for connection in connections:
48
+ x0, y0 = points[connection[0]]
49
+ x1, y1 = points[connection[1]]
50
+ x0, y0 = int(x0), int(y0)
51
+ x1, y1 = int(x1), int(y1)
52
+ cv2.line(img, (x0, y0), (x1, y1), (0,0,0), size)
53
+
54
+
55
+
56
+ # https://github.com/metalwhale/hand_tracking/blob/b2a650d61b4ab917a2367a05b85765b81c0564f2/run.py
57
+ # 8 12 16 20
58
+ # | | | |
59
+ # 7 11 15 19
60
+ # 4 | | | |
61
+ # | 6 10 14 18
62
+ # 3 | | | |
63
+ # | 5---9---13--17
64
+ # 2 \ /
65
+ # \ \ /
66
+ # 1 \ /
67
+ # \ \ /
68
+ # ------0-
69
+ HAND_CONNECTIONS = [
70
+ (0, 1), (1, 2), (2, 3), (3, 4),
71
+ (5, 6), (6, 7), (7, 8),
72
+ (9, 10), (10, 11), (11, 12),
73
+ (13, 14), (14, 15), (15, 16),
74
+ (17, 18), (18, 19), (19, 20),
75
+ (0, 5), (5, 9), (9, 13), (13, 17), (0, 17)
76
+ ]
77
+
78
+ POSE_CONNECTIONS = [
79
+ (0,1), (1,2), (2,3), (3,7),
80
+ (0,4), (4,5), (5,6), (6,8),
81
+ (9,10),
82
+ (11,13), (13,15), (15,17), (17,19), (19,15), (15,21),
83
+ (12,14), (14,16), (16,18), (18,20), (20,16), (16,22),
84
+ (11,12), (12,24), (24,23), (23,11)
85
+ ]
86
+
87
+ # Vertex indices can be found in
88
+ # github.com/google/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualisation.png
89
+ # Found in github.com/google/mediapipe/python/solutions/face_mesh.py
90
+ FACE_CONNECTIONS = [
91
+ # Lips.
92
+ (61, 146), (146, 91), (91, 181), (181, 84), (84, 17),
93
+ (17, 314), (314, 405), (405, 321), (321, 375), (375, 291),
94
+ (61, 185), (185, 40), (40, 39), (39, 37), (37, 0),
95
+ (0, 267), (267, 269), (269, 270), (270, 409), (409, 291),
96
+ (78, 95), (95, 88), (88, 178), (178, 87), (87, 14),
97
+ (14, 317), (317, 402), (402, 318), (318, 324), (324, 308),
98
+ (78, 191), (191, 80), (80, 81), (81, 82), (82, 13),
99
+ (13, 312), (312, 311), (311, 310), (310, 415), (415, 308),
100
+ # Left eye.
101
+ (263, 249), (249, 390), (390, 373), (373, 374), (374, 380),
102
+ (380, 381), (381, 382), (382, 362), (263, 466), (466, 388),
103
+ (388, 387), (387, 386), (386, 385), (385, 384), (384, 398),
104
+ (398, 362),
105
+ # Left eyebrow.
106
+ (276, 283), (283, 282), (282, 295), (295, 285), (300, 293),
107
+ (293, 334), (334, 296), (296, 336),
108
+ # Right eye.
109
+ (33, 7), (7, 163), (163, 144), (144, 145), (145, 153),
110
+ (153, 154), (154, 155), (155, 133), (33, 246), (246, 161),
111
+ (161, 160), (160, 159), (159, 158), (158, 157), (157, 173),
112
+ (173, 133),
113
+ # Right eyebrow.
114
+ (46, 53), (53, 52), (52, 65), (65, 55), (70, 63), (63, 105),
115
+ (105, 66), (66, 107),
116
+ # Face oval.
117
+ (10, 338), (338, 297), (297, 332), (332, 284), (284, 251),
118
+ (251, 389), (389, 356), (356, 454), (454, 323), (323, 361),
119
+ (361, 288), (288, 397), (397, 365), (365, 379), (379, 378),
120
+ (378, 400), (400, 377), (377, 152), (152, 148), (148, 176),
121
+ (176, 149), (149, 150), (150, 136), (136, 172), (172, 58),
122
+ (58, 132), (132, 93), (93, 234), (234, 127), (127, 162),
123
+ (162, 21), (21, 54), (54, 103), (103, 67), (67, 109),
124
+ (109, 10)
125
+ ]
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: [1x3x256x256],[1x3x192x192]
4
+ - Number of parameters:0.13M, 0.6M
5
+ - Model size:0.58MB, 2.32MB
6
+ - Output shape: [1x896x16, 1x896x1], [1, 1x486x3]
7
+
8
+ Source model repository: [MediaPipe-Face-Detection](https://github.com/zmurez/MediaPipePyTorch/)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### c++
49
+ ```bash
50
+ # 加载.npy文件需要用到cnpy库(终端默认路径下执行即可)
51
+ git clone https://github.com/rogersce/cnpy.git
52
+ cd cnpy
53
+ mkdir build && cd build
54
+ cmake ..
55
+ make
56
+ sudo make install
57
+
58
+ cd mediapipe-face-detection/model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp
59
+ mkdir build && cd build
60
+ cmake ..
61
+ make
62
+ ./run_test
63
+ ```
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/CMakeLists.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required (VERSION 3.5)
2
+ project("run_test")
3
+
4
+ find_package(OpenCV REQUIRED)
5
+ find_library(CNPY_LIB cnpy REQUIRED)
6
+
7
+ message(STATUS "oPENCV Library status:")
8
+ message(STATUS ">version:${OpenCV_VERSION}")
9
+ message(STATUS "Include:${OpenCV_INCLUDE_DIRS}")
10
+
11
+ set(CMAKE_CXX_FLAGS "-Wno-error=deprecated-declarations -Wno-deprecated-declarations")
12
+
13
+ include_directories(
14
+ /usr/local/include
15
+ /usr/include/opencv4
16
+ )
17
+
18
+ link_directories(
19
+ /usr/local/lib/
20
+ )
21
+
22
+ file(GLOB SRC_LISTS
23
+ ${CMAKE_CURRENT_SOURCE_DIR}/run_test.cpp
24
+ )
25
+
26
+ add_executable(run_test ${SRC_LISTS})
27
+
28
+ target_link_libraries(run_test
29
+ aidlite
30
+ ${OpenCV_LIBS}
31
+ pthread
32
+ jsoncpp
33
+ ${CNPY_LIB}
34
+ )
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/anchors_float32.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79fa15d63ca4c37eaa953ddb462623e52b07f9af52be5deb7b935b8d3c3d7d94
3
+ size 14464
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/coco.jpg ADDED

Git LFS Details

  • SHA256: f7d96313cff4ba7511af36d8dbc358dd33b2815ec378680a09b97353cadb2378
  • Pointer size: 131 Bytes
  • Size of remote file: 159 kB
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/cpp/run_test.cpp ADDED
@@ -0,0 +1,909 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <fstream>
3
+ #include <opencv2/opencv.hpp>
4
+ #include <aidlux/aidlite/aidlite.hpp>
5
+ #include <vector>
6
+ #include <numeric>
7
+ #include <cmath>
8
+ #include <jsoncpp/json/json.h>
9
+ #include <tuple>
10
+ #include <algorithm>
11
+ #include <sstream>
12
+ #include <string>
13
+ #include <cassert>
14
+ #include "cnpy.h"
15
+
16
+ using namespace cv;
17
+ using namespace std;
18
+ using namespace Aidlux::Aidlite;
19
+
20
+
21
+ // 人脸 landmark 连接索引定义(来自 MediaPipe Face Mesh)
22
+ const std::vector<std::pair<int, int>> FACE_CONNECTIONS = {
23
+ {61, 146}, {146, 91}, {91, 181}, {181, 84}, {84, 17},
24
+ {17, 314}, {314, 405}, {405, 321}, {321, 375}, {375, 291},
25
+ {61, 185}, {185, 40}, {40, 39}, {39, 37}, {37, 0},
26
+ {0, 267}, {267, 269}, {269, 270}, {270, 409}, {409, 291},
27
+ {78, 95}, {95, 88}, {88, 178}, {178, 87}, {87, 14},
28
+ {14, 317}, {317, 402}, {402, 318}, {318, 324}, {324, 308},
29
+ {78, 191}, {191, 80}, {80, 81}, {81, 82}, {82, 13},
30
+ {13, 312}, {312, 311}, {311, 310}, {310, 415}, {415, 308},
31
+ {263, 249}, {249, 390}, {390, 373}, {373, 374}, {374, 380},
32
+ {380, 381}, {381, 382}, {382, 362}, {263, 466}, {466, 388},
33
+ {388, 387}, {387, 386}, {386, 385}, {385, 384}, {384, 398},
34
+ {398, 362}, {276, 283}, {283, 282}, {282, 295}, {295, 285},
35
+ {300, 293}, {293, 334}, {334, 296}, {296, 336}, {33, 7},
36
+ {7, 163}, {163, 144}, {144, 145}, {145, 153}, {153, 154},
37
+ {154, 155}, {155, 133}, {33, 246}, {246, 161}, {161, 160},
38
+ {160, 159}, {159, 158}, {158, 157}, {157, 173}, {173, 133},
39
+ {46, 53}, {53, 52}, {52, 65}, {65, 55}, {70, 63}, {63, 105},
40
+ {105, 66}, {66, 107}, {10, 338}, {338, 297}, {297, 332},
41
+ {332, 284}, {284, 251}, {251, 389}, {389, 356}, {356, 454},
42
+ {454, 323}, {323, 361}, {361, 288}, {288, 397}, {397, 365},
43
+ {365, 379}, {379, 378}, {378, 400}, {400, 377}, {377, 152},
44
+ {152, 148}, {148, 176}, {176, 149}, {149, 150}, {150, 136},
45
+ {136, 172}, {172, 58}, {58, 132}, {132, 93}, {93, 234},
46
+ {234, 127}, {127, 162}, {162, 21}, {21, 54}, {54, 103},
47
+ {103, 67}, {67, 109}, {109, 10}
48
+ };
49
+
50
+ struct Args {
51
+ std::string faceDetector_model = "../../models/m_faceDetector_w8a8.qnn216.ctx.bin";
52
+ std::string faceLandmark_model = "../../models/m_faceLandmark_w8a8.qnn216.ctx.bin";
53
+ std::string imgs = "../coco.jpg";
54
+ int invoke_nums = 10;
55
+ std::string model_type = "QNN";
56
+ };
57
+
58
+
59
+ Args parse_args(int argc, char* argv[]) {
60
+ Args args;
61
+ for (int i = 1; i < argc; ++i) {
62
+ std::string arg = argv[i];
63
+ if (arg == "--faceDetector_model" && i + 1 < argc) {
64
+ args.faceDetector_model = argv[++i];
65
+ } else if (arg == "--faceLandmark_model" && i + 1 < argc) {
66
+ args.faceLandmark_model = argv[++i];
67
+ } else if (arg == "--imgs" && i + 1 < argc) {
68
+ args.imgs = argv[++i];
69
+ } else if (arg == "--invoke_nums" && i + 1 < argc) {
70
+ args.invoke_nums = std::stoi(argv[++i]);
71
+ } else if (arg == "--model_type" && i + 1 < argc) {
72
+ args.model_type = argv[++i];
73
+ }
74
+ }
75
+ return args;
76
+ }
77
+
78
+ std::string to_lower(const std::string& str) {
79
+ std::string lower_str = str;
80
+ std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) {
81
+ return std::tolower(c);
82
+ });
83
+ return lower_str;
84
+ }
85
+
86
+ std::vector<std::vector<float>> load_anchors_from_npy(const std::string& path) {
87
+ cnpy::NpyArray arr = cnpy::npy_load(path);
88
+ float* data_ptr = arr.data<float>();
89
+
90
+ size_t num_rows = arr.shape[0]; // 896
91
+ size_t num_cols = arr.shape[1]; // 4
92
+
93
+ std::vector<std::vector<float>> anchors(num_rows, std::vector<float>(num_cols));
94
+ for (size_t i = 0; i < num_rows; ++i) {
95
+ for (size_t j = 0; j < num_cols; ++j) {
96
+ anchors[i][j] = data_ptr[i * num_cols + j];
97
+ }
98
+ }
99
+
100
+ return anchors;
101
+ }
102
+
103
+
104
+ // 绘制人脸关键点和连接线
105
+ void draw_landmarks(
106
+ cv::Mat& img,
107
+ const std::vector<cv::Point2f>& points,
108
+ const std::vector<float>& flags,
109
+ const std::vector<std::pair<int, int>>& connections,
110
+ float threshold = 0.4f,
111
+ cv::Scalar point_color = cv::Scalar(0, 255, 0),
112
+ cv::Scalar line_color = cv::Scalar(0, 0, 0),
113
+ int size = 2)
114
+ {
115
+ // 画关键点
116
+ for (size_t i = 0; i < points.size(); ++i) {
117
+ if (i < flags.size() && flags[i] > threshold) {
118
+ int x = static_cast<int>(points[i].x);
119
+ int y = static_cast<int>(points[i].y);
120
+ cv::circle(img, cv::Point(x, y), size, point_color, size);
121
+ }
122
+ }
123
+
124
+ // 画连接线(两端都要可见)
125
+ for (const auto& conn : connections) {
126
+ int i0 = conn.first;
127
+ int i1 = conn.second;
128
+ if (i0 < points.size() && i1 < points.size() &&
129
+ i0 < flags.size() && i1 < flags.size() &&
130
+ flags[i0] > threshold && flags[i1] > threshold)
131
+ {
132
+ cv::line(img, points[i0], points[i1], line_color, size);
133
+ }
134
+ }
135
+ }
136
+
137
+
138
+ std::tuple<cv::Mat, cv::Mat, float, cv::Point> resize_pad(const cv::Mat& img) {
139
+ int orig_h = img.rows; // 480
140
+ int orig_w = img.cols; // 640
141
+
142
+ // Step 1: resize width to 256, keep aspect ratio
143
+ int w1 = 256;
144
+ int h1 = w1 * orig_h / orig_w; // 等效于 int(256 * h / w)
145
+
146
+ // Step 2: compute padding in height direction
147
+ int padh = 256 - h1;
148
+ int padw = 0;
149
+
150
+ int padh1 = padh / 2;
151
+ int padh2 = padh1 + (padh % 2);
152
+ int padw1 = padw / 2;
153
+ int padw2 = padw1 + (padw % 2);
154
+
155
+ // Step 3: resize to (w1, h1)
156
+ cv::Mat resized;
157
+ cv::resize(img, resized, cv::Size(w1, h1)); // (256, h1)
158
+
159
+ // Step 4: pad to (256, 256)
160
+ cv::Mat padded;
161
+ cv::copyMakeBorder(resized, padded, padh1, padh2, padw1, padw2, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
162
+
163
+ // Step 5: resize padded to 128×128
164
+ cv::Mat resized128;
165
+ cv::resize(padded, resized128, cv::Size(128, 128));
166
+
167
+ // Step 6: compute scale and pad in original image space
168
+ float scale = static_cast<float>(orig_h) / h1; // h / h1
169
+ cv::Point pad_point(static_cast<int>(padh1 * scale), static_cast<int>(padw1 * scale));
170
+
171
+ return std::make_tuple(padded, resized128, scale, pad_point);
172
+ }
173
+
174
+
175
+ // 将图像转换为 1xC×H×W 格式并归一化(除以 255)
176
+ std::vector<float> preprocess_image(const cv::Mat& img) {
177
+ int H = img.rows;
178
+ int W = img.cols;
179
+ int C = img.channels(); // should be 3
180
+
181
+ std::vector<float> chw(H * W * C); // CHW
182
+ std::vector<float> nchw(1 * C * H * W); // NCHW
183
+
184
+ // 1. HWC → CHW + normalize (float32 / 255.0)
185
+ for (int h = 0; h < H; ++h) {
186
+ for (int w = 0; w < W; ++w) {
187
+ for (int c = 0; c < C; ++c) {
188
+ // OpenCV uses BGR order
189
+ float value = img.at<cv::Vec3b>(h, w)[c] / 255.0f;
190
+ chw[c * H * W + h * W + w] = value;
191
+ }
192
+ }
193
+ }
194
+
195
+ // 2. CHW → NCHW (add batch dimension, actually just copy)
196
+ for (int i = 0; i < C * H * W; ++i) {
197
+ nchw[i] = chw[i];
198
+ }
199
+
200
+ return nchw; // shape: [1, 3, H, W]
201
+ }
202
+
203
+
204
+ // 只用前4个坐标计算IOU(默认框位置在前4个坐标)
205
+ float IoU(const std::vector<float>& box1, const std::vector<float>& box2) {
206
+ float x1 = std::max(box1[0], box2[0]);
207
+ float y1 = std::max(box1[1], box2[1]);
208
+ float x2 = std::min(box1[2], box2[2]);
209
+ float y2 = std::min(box1[3], box2[3]);
210
+
211
+ float inter_area = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1);
212
+ float box1_area = std::max(0.0f, box1[2] - box1[0]) * std::max(0.0f, box1[3] - box1[1]);
213
+ float box2_area = std::max(0.0f, box2[2] - box2[0]) * std::max(0.0f, box2[3] - box2[1]);
214
+ float union_area = box1_area + box2_area - inter_area;
215
+
216
+ return union_area > 0 ? inter_area / union_area : 0.0f;
217
+ }
218
+
219
+ std::vector<std::vector<float>> weighted_non_max_suppression(
220
+ std::vector<std::vector<float>>& detections,
221
+ int num_coords = 16,
222
+ float min_suppression_threshold = 0.3f)
223
+ {
224
+ if (detections.empty()) return {};
225
+
226
+ std::vector<int> indices(detections.size());
227
+ std::iota(indices.begin(), indices.end(), 0);
228
+
229
+ // 按置信度降序排序
230
+ std::sort(indices.begin(), indices.end(), [&](int a, int b) {
231
+ return detections[a][num_coords] > detections[b][num_coords];
232
+ });
233
+
234
+ std::vector<std::vector<float>> output;
235
+
236
+ while (!indices.empty()) {
237
+ int best_idx = indices.front();
238
+ const auto& best_det = detections[best_idx];
239
+ std::vector<int> overlapping = { best_idx };
240
+
241
+ for (size_t i = 1; i < indices.size(); ++i) {
242
+ float iou = IoU(best_det, detections[indices[i]]);
243
+ if (iou > min_suppression_threshold) {
244
+ overlapping.push_back(indices[i]);
245
+ }
246
+ }
247
+
248
+ // 更新剩余索引
249
+ std::vector<int> new_indices;
250
+ for (int idx : indices) {
251
+ if (std::find(overlapping.begin(), overlapping.end(), idx) == overlapping.end()) {
252
+ new_indices.push_back(idx);
253
+ }
254
+ }
255
+ indices = new_indices;
256
+
257
+ // 加权平均:坐标 * 置信度
258
+ if (overlapping.size() == 1) {
259
+ output.push_back(best_det);
260
+ } else {
261
+ std::vector<float> weighted(num_coords + 1, 0.0f);
262
+ float total_score = 0.0f;
263
+
264
+ for (int idx : overlapping) {
265
+ float score = detections[idx][num_coords];
266
+ total_score += score;
267
+ for (int k = 0; k < num_coords; ++k) {
268
+ weighted[k] += detections[idx][k] * score;
269
+ }
270
+ }
271
+
272
+ for (int k = 0; k < num_coords; ++k) {
273
+ weighted[k] /= total_score;
274
+ }
275
+ weighted[num_coords] = total_score / overlapping.size(); // 取平均得分
276
+
277
+ // std::cout << "Weighted box: ";
278
+ // for (float v : weighted) std::cout << v << " ";
279
+ // std::cout << "\n";
280
+
281
+ output.push_back(weighted);
282
+ }
283
+ }
284
+
285
+ // TODO
286
+ auto x = output[0];
287
+ output.clear();
288
+ output.push_back(x);
289
+
290
+ return output;
291
+ }
292
+
293
+
294
+ std::vector<std::vector<float>> denormalize_detections(
295
+ const std::vector<std::vector<float>>& detections,
296
+ float scale,
297
+ const cv::Point& pad
298
+ ) {
299
+ std::vector<std::vector<float>> result = detections;
300
+
301
+ for (size_t i = 0; i < result.size(); ++i) {
302
+ std::vector<float>& det = result[i];
303
+
304
+ // bbox coords: x1, y1, x2, y2
305
+ det[0] = det[0] * scale * 256.0f - pad.x; // x1
306
+ det[1] = det[1] * scale * 256.0f - pad.y; // y1
307
+ det[2] = det[2] * scale * 256.0f - pad.x; // x2
308
+ det[3] = det[3] * scale * 256.0f - pad.y; // y2
309
+
310
+ // keypoints (starting from index 4): format [y, x, y, x, ...]
311
+ for (size_t k = 4; k + 1 < det.size(); k += 2) {
312
+ det[k] = det[k] * scale * 256.0f - pad.y; // y
313
+ det[k + 1] = det[k + 1] * scale * 256.0f - pad.x; // x
314
+ }
315
+ }
316
+
317
+ return result;
318
+ }
319
+
320
+
321
+ void detection2roi(
322
+ const std::vector<std::vector<float>>& detections,
323
+ std::vector<float>& xc,
324
+ std::vector<float>& yc,
325
+ std::vector<float>& scale,
326
+ std::vector<float>& theta,
327
+ int kp1, int kp2, // 关键点索引
328
+ float dy, float dscale, float theta0
329
+ ) {
330
+ size_t N = detections.size();
331
+ xc.resize(N);
332
+ yc.resize(N);
333
+ scale.resize(N);
334
+ theta.resize(N);
335
+
336
+ for (size_t i = 0; i < N; ++i) {
337
+ const std::vector<float>& det = detections[i];
338
+
339
+ float x1 = det[1];
340
+ float x2 = det[3];
341
+ float y1 = det[0];
342
+ float y2 = det[2];
343
+
344
+ float x_center = (x1 + x2) / 2.0f;
345
+ float y_center = (y1 + y2) / 2.0f;
346
+ float box_scale = (x2 - x1); // assumes square box
347
+
348
+ // yc 偏移
349
+ y_center += dy * box_scale;
350
+ box_scale *= dscale;
351
+
352
+ // 获取两个关键点的位置
353
+ int base = 4;
354
+ int idx_y0 = base + 2 * kp1;
355
+ int idx_x0 = base + 2 * kp1 + 1;
356
+ int idx_y1 = base + 2 * kp2;
357
+ int idx_x1 = base + 2 * kp2 + 1;
358
+
359
+ float x0 = det[idx_x0];
360
+ float y0 = det[idx_y0];
361
+ float x1_kp = det[idx_x1];
362
+ float y1_kp = det[idx_y1];
363
+
364
+ float angle = std::atan2(y0 - y1_kp, x0 - x1_kp) - theta0;
365
+
366
+ // 输出赋值
367
+ xc[i] = x_center;
368
+ yc[i] = y_center;
369
+ scale[i] = box_scale;
370
+ // TODO: 这里的 theta 需要根据实际情况调整
371
+ // theta[i] = angle; // 如果需要使用计算的角度
372
+ theta[i] = -0.0094;
373
+ }
374
+ }
375
+
376
+
377
+ void extract_roi(
378
+ const cv::Mat& frame,
379
+ const std::vector<float>& xc,
380
+ const std::vector<float>& yc,
381
+ const std::vector<float>& theta,
382
+ const std::vector<float>& scale,
383
+ std::vector<cv::Mat>& cropped_rois,
384
+ std::vector<cv::Mat>& affine_matrices,
385
+ std::vector<std::vector<cv::Point2f>>& roi_boxes, // 添加返回点坐标
386
+ int resolution = 192
387
+ ) {
388
+ cropped_rois.clear();
389
+ affine_matrices.clear();
390
+ roi_boxes.clear();
391
+
392
+ for (size_t i = 0; i < xc.size(); ++i) {
393
+ float s = scale[i] / 2.0f;
394
+ float cos_t = std::cos(theta[i]);
395
+ float sin_t = std::sin(theta[i]);
396
+
397
+ // 定义4个 unit square 点经过变换后的点(顺序和 Python 中一样)
398
+ std::vector<cv::Point2f> points(4);
399
+ // [-1, -1]
400
+ points[0].x = xc[i] + (-s * cos_t + s * sin_t);
401
+ points[0].y = yc[i] + (-s * sin_t - s * cos_t);
402
+ // [1, -1]
403
+ points[1].x = xc[i] + ( s * cos_t + s * sin_t);
404
+ points[1].y = yc[i] + ( s * sin_t - s * cos_t);
405
+ // [-1, 1]
406
+ points[2].x = xc[i] + (-s * cos_t - s * sin_t);
407
+ points[2].y = yc[i] + (-s * sin_t + s * cos_t);
408
+ // [1, 1]
409
+ points[3].x = xc[i] + ( s * cos_t - s * sin_t);
410
+ points[3].y = yc[i] + ( s * sin_t + s * cos_t);
411
+
412
+ // 用前三个点计算仿射变换
413
+ std::vector<cv::Point2f> src_pts = { points[0], points[1], points[2] };
414
+ std::vector<cv::Point2f> dst_pts = {
415
+ cv::Point2f(0, 0),
416
+ cv::Point2f(resolution - 1, 0),
417
+ cv::Point2f(0, resolution - 1)
418
+ };
419
+
420
+ cv::Mat M = cv::getAffineTransform(src_pts, dst_pts);
421
+ cv::Mat M_inv;
422
+ cv::invertAffineTransform(M, M_inv);
423
+
424
+ cv::Mat cropped;
425
+ cv::warpAffine(frame, cropped, M, cv::Size(resolution, resolution), cv::INTER_LINEAR, cv::BORDER_CONSTANT, cv::Scalar(127.5, 127.5, 127.5));
426
+ cropped_rois.push_back(cropped);
427
+ affine_matrices.push_back(M_inv);
428
+ roi_boxes.push_back(points); // 添加变换后的 box 点
429
+ }
430
+ }
431
+
432
+ std::vector<float> preprocess_imgs_to_nchw(const std::vector<cv::Mat>& imgs) {
433
+ int N = imgs.size();
434
+ if (N == 0) return {};
435
+
436
+ int H = 192;
437
+ int W = 192;
438
+ int C = 3; // assume 3 channels (BGR)
439
+
440
+ std::vector<float> output;
441
+ output.reserve(N * C * H * W);
442
+
443
+ for (int n = 0; n < N; ++n) {
444
+ cv::Mat img_float;
445
+ imgs[n].convertTo(img_float, CV_32FC3, 1.0 / 255.0); // Normalize to [0,1]
446
+
447
+ // Split channels (HWC → CHW)
448
+ std::vector<cv::Mat> channels(3);
449
+ cv::split(img_float, channels); // channels[0] = B, [1] = G, [2] = R
450
+
451
+ for (int c = 0; c < C; ++c) {
452
+ for (int i = 0; i < H; ++i) {
453
+ for (int j = 0; j < W; ++j) {
454
+ output.push_back(channels[c].at<float>(i, j));
455
+ }
456
+ }
457
+ }
458
+ }
459
+
460
+ return output; // shape: N x C x H x W
461
+ }
462
+
463
+ // resolution 一般为 192
464
+ std::vector<cv::Point2f> denormalize_landmarks(
465
+ const std::vector<float>& normalized_landmarks,
466
+ const std::vector<cv::Mat>& affines,
467
+ int resolution = 192)
468
+ {
469
+ std::vector<cv::Point2f> output;
470
+
471
+ // 检查输入尺寸
472
+ const int num_faces = 1; // 假设只有一个人脸
473
+ const int num_landmarks = 468;
474
+ if (normalized_landmarks.size() != num_faces * num_landmarks * 3 || affines.size() != num_faces) {
475
+ std::cerr << "Error: Input size mismatch. Expected "
476
+ << num_faces * num_landmarks * 3 << " landmarks and "
477
+ << num_faces << " affine matrices." << std::endl;
478
+ throw std::runtime_error("Input size mismatch");
479
+ }
480
+
481
+ for (int i = 0; i < num_faces; ++i) {
482
+ const cv::Mat& affine = affines[i]; // 2x3 CV_32F
483
+ for (int j = 0; j < num_landmarks; ++j) {
484
+ int idx = i * num_landmarks * 3 + j * 3;
485
+ float x = normalized_landmarks[idx + 0] * resolution;
486
+ float y = normalized_landmarks[idx + 1] * resolution;
487
+ // float z = normalized_landmarks[idx + 2]; // 可选使用
488
+
489
+ // 2x1 输入向量
490
+ cv::Mat pt = (cv::Mat_<float>(2, 1) << x, y);
491
+
492
+ // 提取 affine 的旋转和平移
493
+ cv::Mat M2x2 = affine(cv::Rect(0, 0, 2, 2)).clone();
494
+ cv::Mat t2x1 = affine(cv::Rect(2, 0, 1, 2)).clone();
495
+ M2x2.convertTo(M2x2, CV_32F);
496
+ t2x1.convertTo(t2x1, CV_32F);
497
+
498
+ // 反仿射变换
499
+ cv::Mat out = M2x2 * pt + t2x1;
500
+
501
+ // 存储为 Point2f
502
+ output.emplace_back(out.at<float>(0, 0), out.at<float>(1, 0));
503
+ }
504
+ }
505
+
506
+ return output; // 输出为 denormalized landmarks,大小为 468 个 Point2f
507
+ }
508
+
509
+
510
+ void draw_roi(cv::Mat& img, const std::vector<std::vector<cv::Point2f>>& boxes) {
511
+ for (const auto& roi : boxes) {
512
+ if (roi.size() < 4) continue;
513
+
514
+ const cv::Point2f& p1 = roi[0];
515
+ const cv::Point2f& p2 = roi[1];
516
+ const cv::Point2f& p3 = roi[2];
517
+ const cv::Point2f& p4 = roi[3];
518
+
519
+ cv::line(img, p1, p2, cv::Scalar(0, 0, 0), 2); // 黑色
520
+ cv::line(img, p1, p3, cv::Scalar(0, 255, 0), 2); // 绿色
521
+ cv::line(img, p2, p4, cv::Scalar(0, 0, 0), 2); // 黑色
522
+ cv::line(img, p3, p4, cv::Scalar(0, 0, 0), 2); // 黑色
523
+ }
524
+ }
525
+
526
+
527
+ void draw_detections(cv::Mat& img, const std::vector<std::vector<float>>& detections, bool with_keypoints = true) {
528
+ for (const auto& det : detections) {
529
+ if (det.size() < 4) continue;
530
+
531
+ float ymin = det[0];
532
+ float xmin = det[1];
533
+ float ymax = det[2];
534
+ float xmax = det[3];
535
+
536
+ cv::rectangle(img, cv::Point(int(xmin), int(ymin)), cv::Point(int(xmax), int(ymax)), cv::Scalar(255, 0, 0), 1);
537
+
538
+ if (with_keypoints && det.size() > 4) {
539
+ int n_keypoints = (det.size() - 4) / 2;
540
+ for (int k = 0; k < n_keypoints; ++k) {
541
+ int kp_x = int(det[4 + k * 2]);
542
+ int kp_y = int(det[4 + k * 2 + 1]);
543
+ cv::circle(img, cv::Point(kp_x, kp_y), 2, cv::Scalar(0, 0, 255), 2);
544
+ }
545
+ }
546
+ }
547
+ }
548
+
549
+
550
+ std::vector<std::vector<float>> loadAnchors(const std::string& filename) {
551
+ std::ifstream in(filename);
552
+ std::vector<std::vector<float>> anchors;
553
+
554
+ if (!in.is_open()) {
555
+ std::cerr << "Failed to open file: " << filename << std::endl;
556
+ return anchors;
557
+ }
558
+
559
+ std::string line;
560
+ while (std::getline(in, line)) {
561
+ std::istringstream ss(line);
562
+ std::vector<float> anchor;
563
+ float value;
564
+ while (ss >> value) {
565
+ anchor.push_back(value);
566
+ }
567
+ if (!anchor.empty()) {
568
+ anchors.push_back(anchor);
569
+ }
570
+ }
571
+
572
+ in.close();
573
+ return anchors;
574
+ }
575
+
576
+ // sigmoid 函数
577
+ float sigmoid(float x) {
578
+ return 1.0f / (1.0f + std::exp(-x));
579
+ }
580
+
581
+ // clamp 函数
582
+ float clamp(float x, float min_val, float max_val) {
583
+ return std::max(min_val, std::min(max_val, x));
584
+ }
585
+
586
+ // shape: [batch, num_anchors, num_coords] => [batch][anchor][coord]
587
+ std::vector<std::vector<std::vector<float>>> decode_boxes(
588
+ const std::vector<float>& raw_boxes,
589
+ const std::vector<std::vector<float>>& anchors,
590
+ int batch, int num_anchors, int num_coords,
591
+ float x_scale, float y_scale, float w_scale, float h_scale,
592
+ int num_keypoints)
593
+ {
594
+ std::vector<std::vector<std::vector<float>>> decoded_boxes(batch,
595
+ std::vector<std::vector<float>>(num_anchors, std::vector<float>(num_coords, 0)));
596
+
597
+ for (int b = 0; b < batch; ++b) {
598
+ for (int i = 0; i < num_anchors; ++i) {
599
+ int base = b * num_anchors * num_coords + i * num_coords;
600
+
601
+ float x_center = raw_boxes[base + 0] / x_scale * anchors[i][2] + anchors[i][0];
602
+ float y_center = raw_boxes[base + 1] / y_scale * anchors[i][3] + anchors[i][1];
603
+ float w = raw_boxes[base + 2] / w_scale * anchors[i][2];
604
+ float h = raw_boxes[base + 3] / h_scale * anchors[i][3];
605
+
606
+ decoded_boxes[b][i][0] = y_center - h / 2.0f; // ymin
607
+ decoded_boxes[b][i][1] = x_center - w / 2.0f; // xmin
608
+ decoded_boxes[b][i][2] = y_center + h / 2.0f; // ymax
609
+ decoded_boxes[b][i][3] = x_center + w / 2.0f; // xmax
610
+
611
+ for (int k = 0; k < num_keypoints; ++k) {
612
+ int offset = 4 + k * 2;
613
+ float keypoint_x = raw_boxes[base + offset] / x_scale * anchors[i][2] + anchors[i][0];
614
+ float keypoint_y = raw_boxes[base + offset + 1] / y_scale * anchors[i][3] + anchors[i][1];
615
+ decoded_boxes[b][i][offset] = keypoint_x;
616
+ decoded_boxes[b][i][offset + 1] = keypoint_y;
617
+ }
618
+ }
619
+ }
620
+
621
+ return decoded_boxes;
622
+ }
623
+
624
+ std::vector<std::vector<std::vector<float>>> tensors_to_detections(
625
+ const std::vector<float>& raw_box_tensor,
626
+ const std::vector<float>& raw_score_tensor,
627
+ const std::vector<std::vector<float>>& anchors,
628
+ int batch, int num_anchors, int num_coords, int num_classes, int num_keypoints,
629
+ float x_scale, float y_scale, float w_scale, float h_scale,
630
+ float score_clipping_thresh, float min_score_thresh)
631
+ {
632
+ assert(raw_box_tensor.size() == batch * num_anchors * num_coords);
633
+ assert(raw_score_tensor.size() == batch * num_anchors * num_classes);
634
+ assert(anchors.size() == size_t(num_anchors));
635
+
636
+ auto detection_boxes = decode_boxes(
637
+ raw_box_tensor, anchors, batch, num_anchors, num_coords,
638
+ x_scale, y_scale, w_scale, h_scale, num_keypoints);
639
+
640
+ std::vector<std::vector<std::vector<float>>> output_detections;
641
+
642
+ for (int b = 0; b < batch; ++b) {
643
+ std::vector<std::vector<float>> detections;
644
+
645
+ for (int i = 0; i < num_anchors; ++i) {
646
+ int score_index = b * num_anchors * num_classes + i * num_classes;
647
+
648
+ // 单类情况,取第0类
649
+ float score_raw = raw_score_tensor[score_index];
650
+ float score = sigmoid(clamp(score_raw, -score_clipping_thresh, score_clipping_thresh));
651
+
652
+ if (score >= min_score_thresh) {
653
+ std::vector<float> det = detection_boxes[b][i]; // shape [num_coords]
654
+ det.push_back(score); // 追加置信度
655
+ detections.push_back(det); // shape [num_coords+1]
656
+ }
657
+ }
658
+
659
+ output_detections.push_back(detections); // 每个 batch 一个 vector
660
+ }
661
+
662
+ return output_detections;
663
+ }
664
+
665
+
666
+ int invoke(const Args& args) {
667
+ std::cout << "Start main ... ... Model Path: " << args.faceDetector_model << "\n"
668
+ << args.faceLandmark_model << "\n"
669
+ << "Image Path: " << args.imgs << "\n"
670
+ << "Inference Nums: " << args.invoke_nums << "\n"
671
+ << "Model Type: " << args.model_type << "\n";
672
+ // =============================================================faceDetector_model start
673
+ Model* model1 = Model::create_instance(args.faceDetector_model);
674
+ if(model1 == nullptr){
675
+ printf("Create model1 failed !\n");
676
+ return EXIT_FAILURE;
677
+ }
678
+ Config* config1 = Config::create_instance();
679
+ if(config1 == nullptr){
680
+ printf("Create config1 failed !\n");
681
+ return EXIT_FAILURE;
682
+ }
683
+ config1->implement_type = ImplementType::TYPE_LOCAL;
684
+ std::string model_type_lower1 = to_lower(args.model_type);
685
+ if (model_type_lower1 == "qnn"){
686
+ config1->framework_type = FrameworkType::TYPE_QNN;
687
+ } else if (model_type_lower1 == "snpe2" || model_type_lower1 == "snpe") {
688
+ config1->framework_type = FrameworkType::TYPE_SNPE2;
689
+ }
690
+ config1->accelerate_type = AccelerateType::TYPE_DSP;
691
+ config1->is_quantify_model = 1;
692
+
693
+ std::vector<std::vector<uint32_t>> input_shapes1 = {{1,3,256,256}};
694
+ std::vector<std::vector<uint32_t>> output_shapes1 = {{1,896,16},{1,896,1}};
695
+ model1->set_model_properties(input_shapes1, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes1, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
696
+ std::unique_ptr<Interpreter> fast_interpreter1 = InterpreterBuilder::build_interpretper_from_model_and_config(model1, config1);
697
+ if(fast_interpreter1 == nullptr){
698
+ printf("build_interpretper_from_model_and_config failed !\n");
699
+ return EXIT_FAILURE;
700
+ }
701
+ int result = fast_interpreter1->init();
702
+ if(result != EXIT_SUCCESS){
703
+ printf("interpreter->init() failed !\n");
704
+ return EXIT_FAILURE;
705
+ }
706
+ // load model
707
+ fast_interpreter1->load_model();
708
+ if(result != EXIT_SUCCESS){
709
+ printf("interpreter->load_model() failed !\n");
710
+ return EXIT_FAILURE;
711
+ }
712
+ printf("detect model load success!\n");
713
+ // =============================================================faceDetector_model over
714
+
715
+ // =============================================================faceLandmark_model start
716
+ Model* model2 = Model::create_instance(args.faceLandmark_model);
717
+ if(model2 == nullptr){
718
+ printf("Create model2 failed !\n");
719
+ return EXIT_FAILURE;
720
+ }
721
+ Config* config2 = Config::create_instance();
722
+ if(config2 == nullptr){
723
+ printf("Create config2 failed !\n");
724
+ return EXIT_FAILURE;
725
+ }
726
+ config2->implement_type = ImplementType::TYPE_LOCAL;
727
+ std::string model_type_lower2 = to_lower(args.model_type);
728
+ if (model_type_lower2 == "qnn"){
729
+ config2->framework_type = FrameworkType::TYPE_QNN;
730
+ } else if (model_type_lower2 == "snpe2" || model_type_lower2 == "snpe") {
731
+ config2->framework_type = FrameworkType::TYPE_SNPE2;
732
+ }
733
+ config2->accelerate_type = AccelerateType::TYPE_DSP;
734
+ config2->is_quantify_model = 1;
735
+
736
+ std::vector<std::vector<uint32_t>> input_shapes2 = {{1,3,192,192}};
737
+ std::vector<std::vector<uint32_t>> output_shapes2 = {{1},{1,468,3}};
738
+ model2->set_model_properties(input_shapes2, Aidlux::Aidlite::DataType::TYPE_FLOAT32, output_shapes2, Aidlux::Aidlite::DataType::TYPE_FLOAT32);
739
+ std::unique_ptr<Interpreter> fast_interpreter2 = InterpreterBuilder::build_interpretper_from_model_and_config(model2, config2);
740
+ if(fast_interpreter2 == nullptr){
741
+ printf("build_interpretper_from_model_and_config2 failed !\n");
742
+ return EXIT_FAILURE;
743
+ }
744
+ result = fast_interpreter2->init();
745
+ if(result != EXIT_SUCCESS){
746
+ printf("interpreter2->init() failed !\n");
747
+ return EXIT_FAILURE;
748
+ }
749
+ // load model
750
+ fast_interpreter2->load_model();
751
+ if(result != EXIT_SUCCESS){
752
+ printf("interpreter2->load_model() failed !\n");
753
+ return EXIT_FAILURE;
754
+ }
755
+ printf("detect model2 load success!\n");
756
+ // =============================================================faceLandmark_model over
757
+
758
+
759
+ auto anchors = load_anchors_from_npy("../anchors_float32.npy");
760
+ cv::Mat frame = cv::imread(args.imgs);
761
+ if (frame.empty()) {
762
+ printf("detect image load failed!\n");
763
+ return 1;
764
+ }
765
+ // printf("img_src cols: %d, img_src rows: %d\n", frame.cols, frame.rows);
766
+ cv::Mat input_data;
767
+ cv::Mat frame_clone1 = frame.clone();
768
+ cv::cvtColor(frame_clone1, frame_clone1, cv::COLOR_BGR2RGB);
769
+ cv::Mat frame_clone = frame.clone();
770
+
771
+
772
+ cv::Mat img1, img2;
773
+ float scale;
774
+ cv::Point pad;
775
+ std::tie(img1, img2, scale, pad) = resize_pad(frame_clone1);
776
+ std::vector<float> input_tensor = preprocess_image(img1);
777
+
778
+ float *outdata0 = nullptr;
779
+ float *outdata1 = nullptr;
780
+ std::vector<float> invoke_time;
781
+ for (int i = 0; i < args.invoke_nums; ++i) {
782
+ result = fast_interpreter1->set_input_tensor(0, input_tensor.data());
783
+ if(result != EXIT_SUCCESS){
784
+ printf("interpreter->set_input_tensor() failed !\n");
785
+ return EXIT_FAILURE;
786
+ }
787
+ auto t1 = std::chrono::high_resolution_clock::now();
788
+ result = fast_interpreter1->invoke();
789
+ auto t2 = std::chrono::high_resolution_clock::now();
790
+ std::chrono::duration<double> cost_time = t2 - t1;
791
+ invoke_time.push_back(cost_time.count() * 1000);
792
+ if(result != EXIT_SUCCESS){
793
+ printf("interpreter->invoke() failed !\n");
794
+ return EXIT_FAILURE;
795
+ }
796
+ uint32_t out_data_0 = 0;
797
+ result = fast_interpreter1->get_output_tensor(0, (void**)&outdata0, &out_data_0);
798
+ if(result != EXIT_SUCCESS){
799
+ printf("interpreter1->get_output_tensor() 0 failed !\n");
800
+ return EXIT_FAILURE;
801
+ }
802
+
803
+ uint32_t out_data_1 = 0;
804
+ result = fast_interpreter1->get_output_tensor(1, (void**)&outdata1, &out_data_1);
805
+ if(result != EXIT_SUCCESS){
806
+ printf("interpreter1->get_output_tensor() 1 failed !\n");
807
+ return EXIT_FAILURE;
808
+ }
809
+
810
+ }
811
+
812
+ std::vector<float> tensor_1_896_16(outdata0, outdata0 + 896*16);
813
+ std::vector<float> tensor_1_896_1(outdata1, outdata1 + 896*1);
814
+
815
+ std::vector<std::vector<std::vector<float>>> detections = tensors_to_detections(
816
+ tensor_1_896_16, tensor_1_896_1, anchors,
817
+ 1, 896, 16, 1, 6,
818
+ 256.0f, 256.0f, 256.0f, 256.0f,
819
+ 100.0f, 0.4f);
820
+
821
+
822
+ std::vector<std::vector<std::vector<float>>> filtered_detections;
823
+ for (size_t i = 0; i < detections.size(); ++i) {
824
+ std::vector<std::vector<float>>& dets = detections[i];
825
+ std::vector<std::vector<float>> faces = weighted_non_max_suppression(dets);
826
+ filtered_detections.push_back(faces);
827
+ }
828
+
829
+
830
+ // std::cout << "filtered_detections size: " << filtered_detections.size() << "\n";
831
+ // std::cout << "scale: " << scale << ", pad: (" << pad.x << ", " << pad.y << ")\n";
832
+ std::vector<std::vector<float>> face_detections = denormalize_detections(filtered_detections[0], scale, pad);
833
+
834
+ // std::cout << "face_detections size: " << face_detections.size() << "\n";
835
+ std::vector<float> xc, yc, scales, theta;
836
+ int kp1 = 0, kp2 = 1; // 关键点索引
837
+ float dy = 0.0f; // 根据模型定义设定
838
+ float dscale = 1.5f; // 缩放因子
839
+ float theta0 = 0.0f; // 基准角度
840
+
841
+ detection2roi(face_detections, xc, yc, scales, theta, kp1, kp2, dy, dscale, theta0);
842
+ std::vector<cv::Mat> rois;
843
+ std::vector<cv::Mat> affines;
844
+ std::vector<std::vector<cv::Point2f>> boxes;
845
+
846
+ extract_roi(frame_clone1, xc, yc, theta, scales, rois, affines, boxes);
847
+ if (!boxes.empty()) {
848
+ std::cout << "Detected " << boxes.size() << " faces.\n";
849
+ // 检测到人脸,继续处理 boxes[0] ...
850
+ std::vector<float> input_tensor = preprocess_imgs_to_nchw(rois);
851
+
852
+ float *outdata1_0 = nullptr;
853
+ float *outdata1_1 = nullptr;
854
+
855
+ result = fast_interpreter2->set_input_tensor(0, input_tensor.data());
856
+ if(result != EXIT_SUCCESS){
857
+ printf("interpreter2->set_input_tensor() failed !\n");
858
+ return EXIT_FAILURE;
859
+ }
860
+ auto t1 = std::chrono::high_resolution_clock::now();
861
+ result = fast_interpreter2->invoke();
862
+ auto t2 = std::chrono::high_resolution_clock::now();
863
+ std::chrono::duration<double> cost_time = t2 - t1;
864
+ invoke_time.push_back(cost_time.count() * 1000);
865
+ if(result != EXIT_SUCCESS){
866
+ printf("interpreter2->invoke() failed !\n");
867
+ return EXIT_FAILURE;
868
+ }
869
+ uint32_t out_data_1_0 = 0;
870
+ result = fast_interpreter2->get_output_tensor(0, (void**)&outdata1_0, &out_data_1_0);
871
+ if(result != EXIT_SUCCESS){
872
+ printf("interpreter2->get_output_tensor() 0 failed !\n");
873
+ return EXIT_FAILURE;
874
+ }
875
+
876
+ uint32_t out_data_1_1 = 0;
877
+ result = fast_interpreter2->get_output_tensor(1, (void**)&outdata1_1, &out_data_1_1);
878
+ if(result != EXIT_SUCCESS){
879
+ printf("interpreter2->get_output_tensor() 1 failed !\n");
880
+ return EXIT_FAILURE;
881
+ }
882
+
883
+ std::vector<float> flags(outdata1_0, outdata1_0 + 1);
884
+ std::vector<float> normalized_landmarks(outdata1_1, outdata1_1 + 468*3);
885
+
886
+ std::vector<cv::Point2f> landmarks = denormalize_landmarks(normalized_landmarks, affines);
887
+ draw_landmarks(frame_clone1, landmarks, flags, FACE_CONNECTIONS);
888
+ } else {
889
+ std::cout << "not detect face!" << std::endl;
890
+ }
891
+
892
+
893
+ draw_roi(frame_clone1, boxes);
894
+ draw_detections(frame_clone1, face_detections);
895
+ cv::cvtColor(frame_clone1, frame_clone1, cv::COLOR_RGB2BGR);
896
+ cv::imwrite("vis_result.jpg", frame_clone1);
897
+
898
+
899
+ fast_interpreter1->destory();
900
+ fast_interpreter2->destory();
901
+ return 0;
902
+
903
+ }
904
+
905
+
906
+ int main(int argc, char* argv[]) {
907
+ Args args = parse_args(argc, argv);
908
+ return invoke(args);
909
+ }
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/models/m_faceDetector_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42ccf2e3a2ee4ff2adf15ea7b00b453bb1a0a183ebd764e8542eb9d56182191d
3
+ size 720424
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/models/m_faceLandmark_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355785d3eeb5a26ad29e3b128d803d3f20b443e01bed3249ff4013ac57d634b4
3
+ size 1068128
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/anchors_face_back.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10bb2fb93ab54ca426d6c750bfc3aad685028a16dcf231357d03694f261fd95
3
+ size 28800
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/anchors_float32.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79fa15d63ca4c37eaa953ddb462623e52b07f9af52be5deb7b935b8d3c3d7d94
3
+ size 14464
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/blazebase.py ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+
7
+
8
+ def resize_pad(img):
9
+ """ resize and pad images to be input to the detectors
10
+
11
+ The face and palm detector networks take 256x256 and 128x128 images
12
+ as input. As such the input image is padded and resized to fit the
13
+ size while maintaing the aspect ratio.
14
+
15
+ Returns:
16
+ img1: 256x256
17
+ img2: 128x128
18
+ scale: scale factor between original image and 256x256 image
19
+ pad: pixels of padding in the original image
20
+ """
21
+
22
+ size0 = img.shape
23
+ if size0[0]>=size0[1]:
24
+ h1 = 256
25
+ w1 = 256 * size0[1] // size0[0]
26
+ padh = 0
27
+ padw = 256 - w1
28
+ scale = size0[1] / w1
29
+ else:
30
+ h1 = 256 * size0[0] // size0[1]
31
+ w1 = 256
32
+ padh = 256 - h1
33
+ padw = 0
34
+ scale = size0[0] / h1
35
+ padh1 = padh//2
36
+ padh2 = padh//2 + padh%2
37
+ padw1 = padw//2
38
+ padw2 = padw//2 + padw%2
39
+ img1 = cv2.resize(img, (w1,h1))
40
+ img1 = np.pad(img1, ((padh1, padh2), (padw1, padw2), (0,0)))
41
+ pad = (int(padh1 * scale), int(padw1 * scale))
42
+ img2 = cv2.resize(img1, (128,128))
43
+ return img1, img2, scale, pad
44
+
45
+
46
+ def denormalize_detections(detections, scale, pad):
47
+ """ maps detection coordinates from [0,1] to image coordinates
48
+
49
+ The face and palm detector networks take 256x256 and 128x128 images
50
+ as input. As such the input image is padded and resized to fit the
51
+ size while maintaing the aspect ratio. This function maps the
52
+ normalized coordinates back to the original image coordinates.
53
+
54
+ Inputs:
55
+ detections: nxm tensor. n is the number of detections.
56
+ m is 4+2*k where the first 4 valuse are the bounding
57
+ box coordinates and k is the number of additional
58
+ keypoints output by the detector.
59
+ scale: scalar that was used to resize the image
60
+ pad: padding in the x and y dimensions
61
+
62
+ """
63
+ detections[:, 0] = detections[:, 0] * scale * 256 - pad[0]
64
+ detections[:, 1] = detections[:, 1] * scale * 256 - pad[1]
65
+ detections[:, 2] = detections[:, 2] * scale * 256 - pad[0]
66
+ detections[:, 3] = detections[:, 3] * scale * 256 - pad[1]
67
+
68
+ detections[:, 4::2] = detections[:, 4::2] * scale * 256 - pad[1]
69
+ detections[:, 5::2] = detections[:, 5::2] * scale * 256 - pad[0]
70
+ return detections
71
+
72
+
73
+
74
+
75
+ class BlazeBlock(nn.Module):
76
+ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, act='relu', skip_proj=False):
77
+ super(BlazeBlock, self).__init__()
78
+
79
+ self.stride = stride
80
+ self.kernel_size = kernel_size
81
+ self.channel_pad = out_channels - in_channels
82
+
83
+ # TFLite uses slightly different padding than PyTorch
84
+ # on the depthwise conv layer when the stride is 2.
85
+ if stride == 2:
86
+ self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
87
+ padding = 0
88
+ else:
89
+ padding = (kernel_size - 1) // 2
90
+
91
+ self.convs = nn.Sequential(
92
+ nn.Conv2d(in_channels=in_channels, out_channels=in_channels,
93
+ kernel_size=kernel_size, stride=stride, padding=padding,
94
+ groups=in_channels, bias=True),
95
+ nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
96
+ kernel_size=1, stride=1, padding=0, bias=True),
97
+ )
98
+
99
+ if skip_proj:
100
+ self.skip_proj = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
101
+ kernel_size=1, stride=1, padding=0, bias=True)
102
+ else:
103
+ self.skip_proj = None
104
+
105
+ if act == 'relu':
106
+ self.act = nn.ReLU(inplace=True)
107
+ elif act == 'prelu':
108
+ self.act = nn.PReLU(out_channels)
109
+ else:
110
+ raise NotImplementedError("unknown activation %s"%act)
111
+
112
+ def forward(self, x):
113
+ if self.stride == 2:
114
+ if self.kernel_size==3:
115
+ h = F.pad(x, (0, 2, 0, 2), "constant", 0)
116
+ else:
117
+ h = F.pad(x, (1, 2, 1, 2), "constant", 0)
118
+ x = self.max_pool(x)
119
+ else:
120
+ h = x
121
+
122
+ if self.skip_proj is not None:
123
+ x = self.skip_proj(x)
124
+ elif self.channel_pad > 0:
125
+ x = F.pad(x, (0, 0, 0, 0, 0, self.channel_pad), "constant", 0)
126
+
127
+
128
+ return self.act(self.convs(h) + x)
129
+
130
+
131
+ class FinalBlazeBlock(nn.Module):
132
+ def __init__(self, channels, kernel_size=3):
133
+ super(FinalBlazeBlock, self).__init__()
134
+
135
+ # TFLite uses slightly different padding than PyTorch
136
+ # on the depthwise conv layer when the stride is 2.
137
+ self.convs = nn.Sequential(
138
+ nn.Conv2d(in_channels=channels, out_channels=channels,
139
+ kernel_size=kernel_size, stride=2, padding=0,
140
+ groups=channels, bias=True),
141
+ nn.Conv2d(in_channels=channels, out_channels=channels,
142
+ kernel_size=1, stride=1, padding=0, bias=True),
143
+ )
144
+
145
+ self.act = nn.ReLU(inplace=True)
146
+
147
+ def forward(self, x):
148
+ h = F.pad(x, (0, 2, 0, 2), "constant", 0)
149
+
150
+ return self.act(self.convs(h))
151
+
152
+
153
+ class BlazeBase(nn.Module):
154
+ """ Base class for media pipe models. """
155
+
156
+ def _device(self):
157
+ """Which device (CPU or GPU) is being used by this model?"""
158
+ return self.classifier_8.weight.device
159
+
160
+ def load_weights(self, path):
161
+ self.load_state_dict(torch.load(path))
162
+ self.eval()
163
+
164
+
165
+ class BlazeLandmark(BlazeBase):
166
+ """ Base class for landmark models. """
167
+
168
+ def extract_roi(self, frame, xc, yc, theta, scale):
169
+
170
+ # take points on unit square and transform them according to the roi
171
+ points = torch.tensor([[-1, -1, 1, 1],
172
+ [-1, 1, -1, 1]], device=scale.device).view(1,2,4)
173
+ points = points * scale.view(-1,1,1)/2
174
+ theta = theta.view(-1, 1, 1)
175
+ R = torch.cat((
176
+ torch.cat((torch.cos(theta), -torch.sin(theta)), 2),
177
+ torch.cat((torch.sin(theta), torch.cos(theta)), 2),
178
+ ), 1)
179
+ center = torch.cat((xc.view(-1,1,1), yc.view(-1,1,1)), 1)
180
+ points = R @ points + center
181
+
182
+ # use the points to compute the affine transform that maps
183
+ # these points back to the output square
184
+ res = self.resolution
185
+ points1 = np.array([[0, 0, res-1],
186
+ [0, res-1, 0]], dtype=np.float32).T
187
+ affines = []
188
+ imgs = []
189
+ for i in range(points.shape[0]):
190
+ pts = points[i, :, :3].cpu().numpy().T
191
+ M = cv2.getAffineTransform(pts, points1)
192
+ img = cv2.warpAffine(frame, M, (res,res))#, borderValue=127.5)
193
+ img = torch.tensor(img, device=scale.device)
194
+ imgs.append(img)
195
+ affine = cv2.invertAffineTransform(M).astype('float32')
196
+ affine = torch.tensor(affine, device=scale.device)
197
+ affines.append(affine)
198
+ if imgs:
199
+ imgs = torch.stack(imgs).permute(0,3,1,2).float() / 255.#/ 127.5 - 1.0
200
+ affines = torch.stack(affines)
201
+ else:
202
+ imgs = torch.zeros((0, 3, res, res), device=scale.device)
203
+ affines = torch.zeros((0, 2, 3), device=scale.device)
204
+
205
+ return imgs, affines, points
206
+
207
+ def denormalize_landmarks(self, landmarks, affines):
208
+ landmarks[:,:,:2] *= self.resolution
209
+ for i in range(len(landmarks)):
210
+ landmark, affine = landmarks[i], affines[i]
211
+ landmark = (affine[:,:2] @ landmark[:,:2].T + affine[:,2:]).T
212
+ landmarks[i,:,:2] = landmark
213
+ return landmarks
214
+
215
+
216
+
217
+ class BlazeDetector(BlazeBase):
218
+ """ Base class for detector models.
219
+
220
+ Based on code from https://github.com/tkat0/PyTorch_BlazeFace/ and
221
+ https://github.com/hollance/BlazeFace-PyTorch and
222
+ https://github.com/google/mediapipe/
223
+ """
224
+ def load_anchors(self, path):
225
+ self.anchors = torch.tensor(np.load(path), dtype=torch.float32, device=self._device())
226
+ assert(self.anchors.ndimension() == 2)
227
+ assert(self.anchors.shape[0] == self.num_anchors)
228
+ assert(self.anchors.shape[1] == 4)
229
+
230
+ def _preprocess(self, x):
231
+ """Converts the image pixels to the range [-1, 1]."""
232
+ return x.float() / 255.# 127.5 - 1.0
233
+
234
+ def predict_on_image(self, img):
235
+ """Makes a prediction on a single image.
236
+
237
+ Arguments:
238
+ img: a NumPy array of shape (H, W, 3) or a PyTorch tensor of
239
+ shape (3, H, W). The image's height and width should be
240
+ 128 pixels.
241
+
242
+ Returns:
243
+ A tensor with face detections.
244
+ """
245
+ if isinstance(img, np.ndarray):
246
+ img = torch.from_numpy(img).permute((2, 0, 1))
247
+
248
+ return self.predict_on_batch(img.unsqueeze(0))[0]
249
+
250
+ def predict_on_batch(self, x):
251
+ """Makes a prediction on a batch of images.
252
+
253
+ Arguments:
254
+ x: a NumPy array of shape (b, H, W, 3) or a PyTorch tensor of
255
+ shape (b, 3, H, W). The height and width should be 128 pixels.
256
+
257
+ Returns:
258
+ A list containing a tensor of face detections for each image in
259
+ the batch. If no faces are found for an image, returns a tensor
260
+ of shape (0, 17).
261
+
262
+ Each face detection is a PyTorch tensor consisting of 17 numbers:
263
+ - ymin, xmin, ymax, xmax
264
+ - x,y-coordinates for the 6 keypoints
265
+ - confidence score
266
+ """
267
+ if isinstance(x, np.ndarray):
268
+ x = torch.from_numpy(x).permute((0, 3, 1, 2))
269
+
270
+ assert x.shape[1] == 3
271
+ assert x.shape[2] == self.y_scale
272
+ assert x.shape[3] == self.x_scale
273
+
274
+ # 1. Preprocess the images into tensors:
275
+ x = x.to(self._device())
276
+ x = self._preprocess(x)
277
+
278
+ # 2. Run the neural network:
279
+ with torch.no_grad():
280
+ out = self.__call__(x)
281
+
282
+ # 3. Postprocess the raw predictions:
283
+ detections = self._tensors_to_detections(out[0], out[1], self.anchors)
284
+
285
+ # 4. Non-maximum suppression to remove overlapping detections:
286
+ filtered_detections = []
287
+ for i in range(len(detections)):
288
+ faces = self._weighted_non_max_suppression(detections[i])
289
+ faces = torch.stack(faces) if len(faces) > 0 else torch.zeros((0, self.num_coords+1))
290
+ filtered_detections.append(faces)
291
+
292
+ return filtered_detections
293
+
294
+
295
+ def detection2roi(self, detection):
296
+ """ Convert detections from detector to an oriented bounding box.
297
+
298
+ Adapted from:
299
+ # mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt
300
+
301
+ The center and size of the box is calculated from the center
302
+ of the detected box. Rotation is calcualted from the vector
303
+ between kp1 and kp2 relative to theta0. The box is scaled
304
+ and shifted by dscale and dy.
305
+
306
+ """
307
+ if self.detection2roi_method == 'box':
308
+ # compute box center and scale
309
+ # use mediapipe/calculators/util/detections_to_rects_calculator.cc
310
+ xc = (detection[:,1] + detection[:,3]) / 2
311
+ yc = (detection[:,0] + detection[:,2]) / 2
312
+ scale = (detection[:,3] - detection[:,1]) # assumes square boxes
313
+
314
+ elif self.detection2roi_method == 'alignment':
315
+ # compute box center and scale
316
+ # use mediapipe/calculators/util/alignment_points_to_rects_calculator.cc
317
+ xc = detection[:,4+2*self.kp1]
318
+ yc = detection[:,4+2*self.kp1+1]
319
+ x1 = detection[:,4+2*self.kp2]
320
+ y1 = detection[:,4+2*self.kp2+1]
321
+ scale = ((xc-x1)**2 + (yc-y1)**2).sqrt() * 2
322
+ else:
323
+ raise NotImplementedError(
324
+ "detection2roi_method [%s] not supported"%self.detection2roi_method)
325
+
326
+ yc += self.dy * scale
327
+ scale *= self.dscale
328
+
329
+ # compute box rotation
330
+ x0 = detection[:,4+2*self.kp1]
331
+ y0 = detection[:,4+2*self.kp1+1]
332
+ x1 = detection[:,4+2*self.kp2]
333
+ y1 = detection[:,4+2*self.kp2+1]
334
+ #theta = np.arctan2(y0-y1, x0-x1) - self.theta0
335
+ theta = torch.atan2(y0-y1, x0-x1) - self.theta0
336
+ return xc, yc, scale, theta
337
+
338
+
339
+ def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors):
340
+ """The output of the neural network is a tensor of shape (b, 896, 16)
341
+ containing the bounding box regressor predictions, as well as a tensor
342
+ of shape (b, 896, 1) with the classification confidences.
343
+
344
+ This function converts these two "raw" tensors into proper detections.
345
+ Returns a list of (num_detections, 17) tensors, one for each image in
346
+ the batch.
347
+
348
+ This is based on the source code from:
349
+ mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc
350
+ mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.proto
351
+ """
352
+ assert raw_box_tensor.ndimension() == 3
353
+ assert raw_box_tensor.shape[1] == self.num_anchors
354
+ assert raw_box_tensor.shape[2] == self.num_coords
355
+
356
+ assert raw_score_tensor.ndimension() == 3
357
+ assert raw_score_tensor.shape[1] == self.num_anchors
358
+ assert raw_score_tensor.shape[2] == self.num_classes
359
+
360
+ assert raw_box_tensor.shape[0] == raw_score_tensor.shape[0]
361
+
362
+ detection_boxes = self._decode_boxes(raw_box_tensor, anchors)
363
+
364
+ thresh = self.score_clipping_thresh
365
+ raw_score_tensor = raw_score_tensor.clamp(-thresh, thresh)
366
+ detection_scores = raw_score_tensor.sigmoid().squeeze(dim=-1)
367
+
368
+ # Note: we stripped off the last dimension from the scores tensor
369
+ # because there is only has one class. Now we can simply use a mask
370
+ # to filter out the boxes with too low confidence.
371
+ mask = detection_scores >= self.min_score_thresh
372
+
373
+ # Because each image from the batch can have a different number of
374
+ # detections, process them one at a time using a loop.
375
+ output_detections = []
376
+ for i in range(raw_box_tensor.shape[0]):
377
+ boxes = detection_boxes[i, mask[i]]
378
+ scores = detection_scores[i, mask[i]].unsqueeze(dim=-1)
379
+ output_detections.append(torch.cat((boxes, scores), dim=-1))
380
+
381
+ return output_detections
382
+
383
+ def _decode_boxes(self, raw_boxes, anchors):
384
+ """Converts the predictions into actual coordinates using
385
+ the anchor boxes. Processes the entire batch at once.
386
+ """
387
+ boxes = torch.zeros_like(raw_boxes)
388
+
389
+ x_center = raw_boxes[..., 0] / self.x_scale * anchors[:, 2] + anchors[:, 0]
390
+ y_center = raw_boxes[..., 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
391
+
392
+ w = raw_boxes[..., 2] / self.w_scale * anchors[:, 2]
393
+ h = raw_boxes[..., 3] / self.h_scale * anchors[:, 3]
394
+
395
+ boxes[..., 0] = y_center - h / 2. # ymin
396
+ boxes[..., 1] = x_center - w / 2. # xmin
397
+ boxes[..., 2] = y_center + h / 2. # ymax
398
+ boxes[..., 3] = x_center + w / 2. # xmax
399
+
400
+ for k in range(self.num_keypoints):
401
+ offset = 4 + k*2
402
+ keypoint_x = raw_boxes[..., offset ] / self.x_scale * anchors[:, 2] + anchors[:, 0]
403
+ keypoint_y = raw_boxes[..., offset + 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
404
+ boxes[..., offset ] = keypoint_x
405
+ boxes[..., offset + 1] = keypoint_y
406
+
407
+ return boxes
408
+
409
+ def _weighted_non_max_suppression(self, detections):
410
+ """The alternative NMS method as mentioned in the BlazeFace paper:
411
+
412
+ "We replace the suppression algorithm with a blending strategy that
413
+ estimates the regression parameters of a bounding box as a weighted
414
+ mean between the overlapping predictions."
415
+
416
+ The original MediaPipe code assigns the score of the most confident
417
+ detection to the weighted detection, but we take the average score
418
+ of the overlapping detections.
419
+
420
+ The input detections should be a Tensor of shape (count, 17).
421
+
422
+ Returns a list of PyTorch tensors, one for each detected face.
423
+
424
+ This is based on the source code from:
425
+ mediapipe/calculators/util/non_max_suppression_calculator.cc
426
+ mediapipe/calculators/util/non_max_suppression_calculator.proto
427
+ """
428
+ if len(detections) == 0: return []
429
+
430
+ output_detections = []
431
+
432
+ # Sort the detections from highest to lowest score.
433
+ remaining = torch.argsort(detections[:, self.num_coords], descending=True)
434
+
435
+ while len(remaining) > 0:
436
+ detection = detections[remaining[0]]
437
+
438
+ # Compute the overlap between the first box and the other
439
+ # remaining boxes. (Note that the other_boxes also include
440
+ # the first_box.)
441
+ first_box = detection[:4]
442
+ other_boxes = detections[remaining, :4]
443
+ ious = overlap_similarity(first_box, other_boxes)
444
+
445
+ # If two detections don't overlap enough, they are considered
446
+ # to be from different faces.
447
+ mask = ious > self.min_suppression_threshold
448
+ overlapping = remaining[mask]
449
+ remaining = remaining[~mask]
450
+
451
+ # Take an average of the coordinates from the overlapping
452
+ # detections, weighted by their confidence scores.
453
+ weighted_detection = detection.clone()
454
+ if len(overlapping) > 1:
455
+ coordinates = detections[overlapping, :self.num_coords]
456
+ scores = detections[overlapping, self.num_coords:self.num_coords+1]
457
+ total_score = scores.sum()
458
+ weighted = (coordinates * scores).sum(dim=0) / total_score
459
+ weighted_detection[:self.num_coords] = weighted
460
+ weighted_detection[self.num_coords] = total_score / len(overlapping)
461
+
462
+ output_detections.append(weighted_detection)
463
+
464
+ return output_detections
465
+
466
+
467
+ # IOU code from https://github.com/amdegroot/ssd.pytorch/blob/master/layers/box_utils.py
468
+
469
+ def intersect(box_a, box_b):
470
+ """ We resize both tensors to [A,B,2] without new malloc:
471
+ [A,2] -> [A,1,2] -> [A,B,2]
472
+ [B,2] -> [1,B,2] -> [A,B,2]
473
+ Then we compute the area of intersect between box_a and box_b.
474
+ Args:
475
+ box_a: (tensor) bounding boxes, Shape: [A,4].
476
+ box_b: (tensor) bounding boxes, Shape: [B,4].
477
+ Return:
478
+ (tensor) intersection area, Shape: [A,B].
479
+ """
480
+ A = box_a.size(0)
481
+ B = box_b.size(0)
482
+ max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
483
+ box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
484
+ min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
485
+ box_b[:, :2].unsqueeze(0).expand(A, B, 2))
486
+ inter = torch.clamp((max_xy - min_xy), min=0)
487
+ return inter[:, :, 0] * inter[:, :, 1]
488
+
489
+
490
+ def jaccard(box_a, box_b):
491
+ """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
492
+ is simply the intersection over union of two boxes. Here we operate on
493
+ ground truth boxes and default boxes.
494
+ E.g.:
495
+ A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
496
+ Args:
497
+ box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
498
+ box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
499
+ Return:
500
+ jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
501
+ """
502
+ inter = intersect(box_a, box_b)
503
+ area_a = ((box_a[:, 2]-box_a[:, 0]) *
504
+ (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
505
+ area_b = ((box_b[:, 2]-box_b[:, 0]) *
506
+ (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
507
+ union = area_a + area_b - inter
508
+ return inter / union # [A,B]
509
+
510
+
511
+ def overlap_similarity(box, other_boxes):
512
+ """Computes the IOU between a bounding box and set of other boxes."""
513
+ return jaccard(box.unsqueeze(0), other_boxes).squeeze(0)
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/blazeface.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from blazebase import BlazeDetector, BlazeBlock, FinalBlazeBlock
7
+
8
+
9
+ class BlazeFace(BlazeDetector):
10
+ """The BlazeFace face detection model from MediaPipe.
11
+
12
+ The version from MediaPipe is simpler than the one in the paper;
13
+ it does not use the "double" BlazeBlocks.
14
+
15
+ Because we won't be training this model, it doesn't need to have
16
+ batchnorm layers. These have already been "folded" into the conv
17
+ weights by TFLite.
18
+
19
+ The conversion to PyTorch is fairly straightforward, but there are
20
+ some small differences between TFLite and PyTorch in how they handle
21
+ padding on conv layers with stride 2.
22
+
23
+ This version works on batches, while the MediaPipe version can only
24
+ handle a single image at a time.
25
+
26
+ Based on code from https://github.com/tkat0/PyTorch_BlazeFace/ and
27
+ https://github.com/hollance/BlazeFace-PyTorch and
28
+ https://github.com/google/mediapipe/
29
+
30
+ """
31
+ def __init__(self, back_model=False):
32
+ super(BlazeFace, self).__init__()
33
+
34
+ # These are the settings from the MediaPipe example graph
35
+ # mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt
36
+ self.num_classes = 1
37
+ self.num_anchors = 896
38
+ self.num_coords = 16
39
+ self.score_clipping_thresh = 100.0
40
+ self.back_model = back_model
41
+ if back_model:
42
+ self.x_scale = 256.0
43
+ self.y_scale = 256.0
44
+ self.h_scale = 256.0
45
+ self.w_scale = 256.0
46
+ self.min_score_thresh = 0.65
47
+ else:
48
+ self.x_scale = 128.0
49
+ self.y_scale = 128.0
50
+ self.h_scale = 128.0
51
+ self.w_scale = 128.0
52
+ self.min_score_thresh = 0.75
53
+ self.min_suppression_threshold = 0.3
54
+ self.num_keypoints = 6
55
+
56
+ # These settings are for converting detections to ROIs which can then
57
+ # be extracted and feed into the landmark network
58
+ # use mediapipe/calculators/util/detections_to_rects_calculator.cc
59
+ self.detection2roi_method = 'box'
60
+ # mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt
61
+ self.kp1 = 1
62
+ self.kp2 = 0
63
+ self.theta0 = 0.
64
+ self.dscale = 1.5
65
+ self.dy = 0.
66
+
67
+ self._define_layers()
68
+
69
+ def _define_layers(self):
70
+ if self.back_model:
71
+ self.backbone = nn.Sequential(
72
+ nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0, bias=True),
73
+ nn.ReLU(inplace=True),
74
+
75
+ BlazeBlock(24, 24),
76
+ BlazeBlock(24, 24),
77
+ BlazeBlock(24, 24),
78
+ BlazeBlock(24, 24),
79
+ BlazeBlock(24, 24),
80
+ BlazeBlock(24, 24),
81
+ BlazeBlock(24, 24),
82
+ BlazeBlock(24, 24, stride=2),
83
+ BlazeBlock(24, 24),
84
+ BlazeBlock(24, 24),
85
+ BlazeBlock(24, 24),
86
+ BlazeBlock(24, 24),
87
+ BlazeBlock(24, 24),
88
+ BlazeBlock(24, 24),
89
+ BlazeBlock(24, 24),
90
+ BlazeBlock(24, 48, stride=2),
91
+ BlazeBlock(48, 48),
92
+ BlazeBlock(48, 48),
93
+ BlazeBlock(48, 48),
94
+ BlazeBlock(48, 48),
95
+ BlazeBlock(48, 48),
96
+ BlazeBlock(48, 48),
97
+ BlazeBlock(48, 48),
98
+ BlazeBlock(48, 96, stride=2),
99
+ BlazeBlock(96, 96),
100
+ BlazeBlock(96, 96),
101
+ BlazeBlock(96, 96),
102
+ BlazeBlock(96, 96),
103
+ BlazeBlock(96, 96),
104
+ BlazeBlock(96, 96),
105
+ BlazeBlock(96, 96),
106
+ )
107
+ self.final = FinalBlazeBlock(96)
108
+ self.classifier_8 = nn.Conv2d(96, 2, 1, bias=True)
109
+ self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
110
+
111
+ self.regressor_8 = nn.Conv2d(96, 32, 1, bias=True)
112
+ self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
113
+ else:
114
+ self.backbone1 = nn.Sequential(
115
+ nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=2, padding=0, bias=True),
116
+ nn.ReLU(inplace=True),
117
+
118
+ BlazeBlock(24, 24),
119
+ BlazeBlock(24, 28),
120
+ BlazeBlock(28, 32, stride=2),
121
+ BlazeBlock(32, 36),
122
+ BlazeBlock(36, 42),
123
+ BlazeBlock(42, 48, stride=2),
124
+ BlazeBlock(48, 56),
125
+ BlazeBlock(56, 64),
126
+ BlazeBlock(64, 72),
127
+ BlazeBlock(72, 80),
128
+ BlazeBlock(80, 88),
129
+ )
130
+
131
+ self.backbone2 = nn.Sequential(
132
+ BlazeBlock(88, 96, stride=2),
133
+ BlazeBlock(96, 96),
134
+ BlazeBlock(96, 96),
135
+ BlazeBlock(96, 96),
136
+ BlazeBlock(96, 96),
137
+ )
138
+
139
+ self.classifier_8 = nn.Conv2d(88, 2, 1, bias=True)
140
+ self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
141
+
142
+ self.regressor_8 = nn.Conv2d(88, 32, 1, bias=True)
143
+ self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
144
+
145
+ def forward(self, x):
146
+ # TFLite uses slightly different padding on the first conv layer
147
+ # than PyTorch, so do it manually.
148
+ x = F.pad(x, (1, 2, 1, 2), "constant", 0)
149
+
150
+ b = x.shape[0] # batch size, needed for reshaping later
151
+
152
+ if self.back_model:
153
+ x = self.backbone(x) # (b, 16, 16, 96)
154
+ h = self.final(x) # (b, 8, 8, 96)
155
+ else:
156
+ x = self.backbone1(x) # (b, 88, 16, 16)
157
+ h = self.backbone2(x) # (b, 96, 8, 8)
158
+
159
+ # Note: Because PyTorch is NCHW but TFLite is NHWC, we need to
160
+ # permute the output from the conv layers before reshaping it.
161
+
162
+ c1 = self.classifier_8(x) # (b, 2, 16, 16)
163
+ c1 = c1.permute(0, 2, 3, 1) # (b, 16, 16, 2)
164
+ c1 = c1.reshape(b, -1, 1) # (b, 512, 1)
165
+
166
+ c2 = self.classifier_16(h) # (b, 6, 8, 8)
167
+ c2 = c2.permute(0, 2, 3, 1) # (b, 8, 8, 6)
168
+ c2 = c2.reshape(b, -1, 1) # (b, 384, 1)
169
+
170
+ c = torch.cat((c1, c2), dim=1) # (b, 896, 1)
171
+
172
+ r1 = self.regressor_8(x) # (b, 32, 16, 16)
173
+ r1 = r1.permute(0, 2, 3, 1) # (b, 16, 16, 32)
174
+ r1 = r1.reshape(b, -1, 16) # (b, 512, 16)
175
+
176
+ r2 = self.regressor_16(h) # (b, 96, 8, 8)
177
+ r2 = r2.permute(0, 2, 3, 1) # (b, 8, 8, 96)
178
+ r2 = r2.reshape(b, -1, 16) # (b, 384, 16)
179
+
180
+ r = torch.cat((r1, r2), dim=1) # (b, 896, 16)
181
+ return [r, c]
182
+
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/blazeface_landmark.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from blazebase import BlazeLandmark, BlazeBlock
7
+
8
+ class BlazeFaceLandmark(BlazeLandmark):
9
+ """The face landmark model from MediaPipe.
10
+
11
+ """
12
+ def __init__(self):
13
+ super(BlazeFaceLandmark, self).__init__()
14
+
15
+ # size of ROIs used for input
16
+ self.resolution = 192
17
+
18
+ self._define_layers()
19
+
20
+ def _define_layers(self):
21
+ self.backbone1 = nn.Sequential(
22
+ nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=2, padding=0, bias=True),
23
+ nn.PReLU(16),
24
+
25
+ BlazeBlock(16, 16, 3, act='prelu'),
26
+ BlazeBlock(16, 16, 3, act='prelu'),
27
+ BlazeBlock(16, 32, 3, 2, act='prelu'),
28
+
29
+ BlazeBlock(32, 32, 3, act='prelu'),
30
+ BlazeBlock(32, 32, 3, act='prelu'),
31
+ BlazeBlock(32, 64, 3, 2, act='prelu'),
32
+
33
+ BlazeBlock(64, 64, 3, act='prelu'),
34
+ BlazeBlock(64, 64, 3, act='prelu'),
35
+ BlazeBlock(64, 128, 3, 2, act='prelu'),
36
+
37
+ BlazeBlock(128, 128, 3, act='prelu'),
38
+ BlazeBlock(128, 128, 3, act='prelu'),
39
+ BlazeBlock(128, 128, 3, 2, act='prelu'),
40
+
41
+ BlazeBlock(128, 128, 3, act='prelu'),
42
+ BlazeBlock(128, 128, 3, act='prelu'),
43
+ )
44
+
45
+
46
+ self.backbone2a = nn.Sequential(
47
+ BlazeBlock(128, 128, 3, 2, act='prelu'),
48
+ BlazeBlock(128, 128, 3, act='prelu'),
49
+ BlazeBlock(128, 128, 3, act='prelu'),
50
+ nn.Conv2d(128, 32, 1, padding=0, bias=True),
51
+ nn.PReLU(32),
52
+ BlazeBlock(32, 32, 3, act='prelu'),
53
+ nn.Conv2d(32, 1404, 3, padding=0, bias=True)
54
+ )
55
+
56
+ self.backbone2b = nn.Sequential(
57
+ BlazeBlock(128, 128, 3, 2, act='prelu'),
58
+ nn.Conv2d(128, 32, 1, padding=0, bias=True),
59
+ nn.PReLU(32),
60
+ BlazeBlock(32, 32, 3, act='prelu'),
61
+ nn.Conv2d(32, 1, 3, padding=0, bias=True)
62
+ )
63
+
64
+ def forward(self, x):
65
+ if x.shape[0] == 0:
66
+ return torch.zeros((0,)), torch.zeros((0, 468, 3))
67
+
68
+ x = F.pad(x, (0, 1, 0, 1), "constant", 0)
69
+
70
+ x = self.backbone1(x)
71
+ landmarks = self.backbone2a(x).view(-1, 468, 3) / 192
72
+ flag = self.backbone2b(x).sigmoid().view(-1)
73
+
74
+ return flag, landmarks
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/coco.jpg ADDED

Git LFS Details

  • SHA256: f7d96313cff4ba7511af36d8dbc358dd33b2815ec378680a09b97353cadb2378
  • Pointer size: 131 Bytes
  • Size of remote file: 159 kB
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/demo_qnn.py ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import cv2
4
+ import sys
5
+ from blazebase import resize_pad, denormalize_detections
6
+ from visualization import draw_landmarks, draw_roi, FACE_CONNECTIONS
7
+ import time
8
+ import aidlite
9
+ import os
10
+
11
+ class post_mediapipe_face:
12
+ def __init__(self):
13
+ self.kp1 = 1
14
+ self.kp2 = 0
15
+ self.theta0 = 0.
16
+ self.dscale = 1.5
17
+ self.dy = 0.
18
+ self.x_scale = 256.0
19
+ self.y_scale = 256.0
20
+ self.h_scale = 256.0
21
+ self.w_scale = 256.0
22
+ self.num_keypoints = 6
23
+ self.num_classes = 1
24
+ self.num_anchors = 896
25
+ self.num_coords = 16
26
+ self.min_score_thresh = 0.4 #0.65
27
+ self.score_clipping_thresh = 100.0
28
+ self.min_suppression_threshold = 0.3
29
+ self.resolution = 192
30
+
31
+
32
+ def detection2roi(self,detection):
33
+ xc = (detection[:,1] + detection[:,3]) / 2
34
+ yc = (detection[:,0] + detection[:,2]) / 2
35
+ scale = (detection[:,3] - detection[:,1]) # assumes square boxes
36
+ yc += self.dy * scale
37
+ scale *= self.dscale
38
+ # compute box rotation
39
+ x0 = detection[:,4+2*self.kp1]
40
+ y0 = detection[:,4+2*self.kp1+1]
41
+ x1 = detection[:,4+2*self.kp2]
42
+ y1 = detection[:,4+2*self.kp2+1]
43
+ theta = torch.atan2(y0-y1, x0-x1) - self.theta0
44
+ return xc, yc, scale, theta
45
+
46
+ def _decode_boxes( self,raw_boxes, anchors):
47
+ boxes = torch.zeros_like(raw_boxes)
48
+
49
+ x_center = raw_boxes[..., 0] / self.x_scale * anchors[:, 2] + anchors[:, 0]
50
+ y_center = raw_boxes[..., 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
51
+
52
+ w = raw_boxes[..., 2] / self.w_scale * anchors[:, 2]
53
+ h = raw_boxes[..., 3] / self.h_scale * anchors[:, 3]
54
+
55
+ boxes[..., 0] = y_center - h / 2. # ymin
56
+ boxes[..., 1] = x_center - w / 2. # xmin
57
+ boxes[..., 2] = y_center + h / 2. # ymax
58
+ boxes[..., 3] = x_center + w / 2. # xmax
59
+
60
+ for k in range(self.num_keypoints):
61
+ offset = 4 + k*2
62
+ keypoint_x = raw_boxes[..., offset ] / self.x_scale * anchors[:, 2] + anchors[:, 0]
63
+ keypoint_y = raw_boxes[..., offset + 1] / self.y_scale * anchors[:, 3] + anchors[:, 1]
64
+ boxes[..., offset ] = keypoint_x
65
+ boxes[..., offset + 1] = keypoint_y
66
+ return boxes
67
+
68
+ def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors):
69
+ assert raw_box_tensor.ndimension() == 3
70
+ assert raw_box_tensor.shape[1] == self.num_anchors
71
+ assert raw_box_tensor.shape[2] == self.num_coords
72
+
73
+ assert raw_score_tensor.ndimension() == 3
74
+ assert raw_score_tensor.shape[1] == self.num_anchors
75
+ assert raw_score_tensor.shape[2] == self.num_classes
76
+
77
+ assert raw_box_tensor.shape[0] == raw_score_tensor.shape[0]
78
+
79
+ detection_boxes = self._decode_boxes(raw_box_tensor, anchors)
80
+
81
+ thresh = self.score_clipping_thresh
82
+ raw_score_tensor = raw_score_tensor.clamp(-thresh, thresh)
83
+ detection_scores = raw_score_tensor.sigmoid().squeeze(dim=-1)
84
+
85
+ # Note: we stripped off the last dimension from the scores tensor
86
+ # because there is only has one class. Now we can simply use a mask
87
+ # to filter out the boxes with too low confidence.
88
+ mask = detection_scores >= self.min_score_thresh
89
+
90
+ # Because each image from the batch can have a different number of
91
+ # detections, process them one at a time using a loop.
92
+ output_detections = []
93
+ for i in range(raw_box_tensor.shape[0]):
94
+ boxes = detection_boxes[i, mask[i]]
95
+ scores = detection_scores[i, mask[i]].unsqueeze(dim=-1)
96
+ output_detections.append(torch.cat((boxes, scores), dim=-1))
97
+
98
+ return output_detections
99
+
100
+ def extract_roi( self,frame, xc, yc, theta, scale):
101
+ resolution = 192
102
+ # take points on unit square and transform them according to the roi
103
+ points = torch.tensor([[-1, -1, 1, 1],
104
+ [-1, 1, -1, 1]], device=scale.device).view(1,2,4)
105
+ points = points * scale.view(-1,1,1)/2
106
+ theta = theta.view(-1, 1, 1)
107
+ R = torch.cat((
108
+ torch.cat((torch.cos(theta), -torch.sin(theta)), 2),
109
+ torch.cat((torch.sin(theta), torch.cos(theta)), 2),
110
+ ), 1)
111
+ center = torch.cat((xc.view(-1,1,1), yc.view(-1,1,1)), 1)
112
+ points = R @ points + center
113
+
114
+ # use the points to compute the affine transform that maps
115
+ # these points back to the output square
116
+ res = resolution
117
+ points1 = np.array([[0, 0, res-1],
118
+ [0, res-1, 0]], dtype=np.float32).T
119
+ affines = []
120
+ imgs = []
121
+ for i in range(points.shape[0]):
122
+ pts = points[i, :, :3].detach().numpy().T
123
+ M = cv2.getAffineTransform(pts, points1)
124
+ img = cv2.warpAffine(frame, M, (res,res))#, borderValue=127.5)
125
+ img = torch.tensor(img, device=scale.device)
126
+ imgs.append(img)
127
+ affine = cv2.invertAffineTransform(M).astype('float32')
128
+ affine = torch.tensor(affine, device=scale.device)
129
+ affines.append(affine)
130
+ if imgs:
131
+ imgs = torch.stack(imgs).permute(0,3,1,2).float() / 255.#/ 127.5 - 1.0
132
+ affines = torch.stack(affines)
133
+ else:
134
+ imgs = torch.zeros((0, 3, res, res), device=scale.device)
135
+ affines = torch.zeros((0, 2, 3), device=scale.device)
136
+
137
+ return imgs, affines, points
138
+
139
+ def denormalize_landmarks(self, landmarks, affines):
140
+ landmarks[:,:,:2] *= self.resolution
141
+ for i in range(len(landmarks)):
142
+ landmark, affine = landmarks[i], affines[i]
143
+ landmark = (affine[:,:2] @ landmark[:,:2].T + affine[:,2:]).T
144
+ landmarks[i,:,:2] = landmark
145
+ return landmarks
146
+
147
+ def intersect(self,box_a, box_b):
148
+ A = box_a.size(0)
149
+ B = box_b.size(0)
150
+ max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
151
+ box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
152
+ min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
153
+ box_b[:, :2].unsqueeze(0).expand(A, B, 2))
154
+ inter = torch.clamp((max_xy - min_xy), min=0)
155
+ return inter[:, :, 0] * inter[:, :, 1]
156
+
157
+ def jaccard(self,box_a, box_b):
158
+ inter = self.intersect(box_a, box_b)
159
+ area_a = ((box_a[:, 2]-box_a[:, 0]) *
160
+ (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
161
+ area_b = ((box_b[:, 2]-box_b[:, 0]) *
162
+ (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
163
+ union = area_a + area_b - inter
164
+ return inter / union # [A,B]
165
+
166
+
167
+ def overlap_similarity(self,box, other_boxes):
168
+ """Computes the IOU between a bounding box and set of other boxes."""
169
+ return self.jaccard(box.unsqueeze(0), other_boxes).squeeze(0)
170
+
171
+ def _weighted_non_max_suppression(self,detections):
172
+ if len(detections) == 0: return []
173
+ output_detections = []
174
+
175
+ # Sort the detections from highest to lowest score.
176
+ remaining = torch.argsort(detections[:, num_coords], descending=True)
177
+
178
+ while len(remaining) > 0:
179
+ detection = detections[remaining[0]]
180
+
181
+ # Compute the overlap between the first box and the other
182
+ # remaining boxes. (Note that the other_boxes also include
183
+ # the first_box.)
184
+ first_box = detection[:4]
185
+ other_boxes = detections[remaining, :4]
186
+ ious = self.overlap_similarity(first_box, other_boxes)
187
+
188
+ # If two detections don't overlap enough, they are considered
189
+ # to be from different faces.
190
+ mask = ious > self.min_suppression_threshold
191
+ overlapping = remaining[mask]
192
+ remaining = remaining[~mask]
193
+
194
+ # Take an average of the coordinates from the overlapping
195
+ # detections, weighted by their confidence scores.
196
+ weighted_detection = detection.clone()
197
+ if len(overlapping) > 1:
198
+ coordinates = detections[overlapping, :num_coords]
199
+ scores = detections[overlapping, num_coords:num_coords+1]
200
+ total_score = scores.sum()
201
+ weighted = (coordinates * scores).sum(dim=0) / total_score
202
+ weighted_detection[:num_coords] = weighted
203
+ weighted_detection[num_coords] = total_score / len(overlapping)
204
+
205
+ output_detections.append(weighted_detection)
206
+
207
+ return output_detections
208
+
209
+ def draw_detections(img, detections, with_keypoints=True):
210
+ if isinstance(detections, torch.Tensor):
211
+ detections = detections.detach().numpy()
212
+
213
+ if detections.ndim == 1:
214
+ detections = np.expand_dims(detections, axis=0)
215
+
216
+ n_keypoints = detections.shape[1] // 2 - 2
217
+
218
+ for i in range(detections.shape[0]):
219
+ ymin = detections[i, 0]
220
+ xmin = detections[i, 1]
221
+ ymax = detections[i, 2]
222
+ xmax = detections[i, 3]
223
+
224
+ start_point = (int(xmin), int(ymin))
225
+ end_point = (int(xmax), int(ymax))
226
+ img = cv2.rectangle(img, start_point, end_point, (255, 0, 0), 1)
227
+
228
+ if with_keypoints:
229
+ for k in range(n_keypoints):
230
+ kp_x = int(detections[i, 4 + k*2 ])
231
+ kp_y = int(detections[i, 4 + k*2 + 1])
232
+ cv2.circle(img, (kp_x, kp_y), 2, (0, 0, 255), thickness=2)
233
+ return img
234
+
235
+
236
+
237
+ post_process=post_mediapipe_face()
238
+
239
+ class faceDetectionQnn:
240
+ def __init__(self):
241
+ super().__init__()
242
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_faceDetector_w8a8.qnn216.ctx.bin"))
243
+ if self.model is None:
244
+ print("Create model failed !")
245
+ return
246
+
247
+ self.config = aidlite.Config.create_instance()
248
+ if self.config is None:
249
+ print("build_interpretper_from_model_and_config failed !")
250
+ return
251
+
252
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
253
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
254
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
255
+ self.config.is_quantify_model = 1
256
+
257
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
258
+ if self.interpreter is None:
259
+ print("build_interpretper_from_model_and_config failed !")
260
+ return
261
+ input_shapes = [[1,3, 256, 256]]
262
+ output_shapes = [[1, 896,16],[1,896,1]]
263
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
264
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
265
+
266
+ if self.interpreter is None:
267
+ print("build_interpretper_from_model_and_config failed !")
268
+ result = self.interpreter.init()
269
+ if result != 0:
270
+ print(f"interpreter init failed !")
271
+ result = self.interpreter.load_model()
272
+ if result != 0:
273
+ print("interpreter load model failed !")
274
+
275
+ print(" model load success!")
276
+
277
+ def __call__(self, input):
278
+ self.interpreter.set_input_tensor(0,input)
279
+ invoke_time=[]
280
+ invoke_nums =10
281
+ for i in range(invoke_nums):
282
+ result = self.interpreter.set_input_tensor(0, input.data)
283
+ if result != 0:
284
+ print("interpreter set_input_tensor() failed")
285
+ t1=time.time()
286
+ result = self.interpreter.invoke()
287
+ cost_time = (time.time()-t1)*1000
288
+ invoke_time.append(cost_time)
289
+
290
+ max_invoke_time = max(invoke_time)
291
+ min_invoke_time = min(invoke_time)
292
+ mean_invoke_time = sum(invoke_time)/invoke_nums
293
+ var_invoketime=np.var(invoke_time)
294
+ print("====================================")
295
+ print(f"QNN Detection invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
296
+ print("====================================")
297
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1, 896,16).copy()
298
+ features_1 = self.interpreter.get_output_tensor(1).reshape(1, 896,1).copy()
299
+ return features_0,features_1
300
+
301
+
302
+ class faceLandmarkQnn:
303
+ def __init__(self):
304
+ super().__init__()
305
+ self.model = aidlite.Model.create_instance(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/m_faceLandmark_w8a8.qnn216.ctx.bin"))
306
+ if self.model is None:
307
+ print("Create model failed !")
308
+ return
309
+
310
+ self.config = aidlite.Config.create_instance()
311
+ if self.config is None:
312
+ print("build_interpretper_from_model_and_config failed !")
313
+ return
314
+
315
+ self.config.implement_type = aidlite.ImplementType.TYPE_LOCAL
316
+ self.config.framework_type = aidlite.FrameworkType.TYPE_QNN
317
+ self.config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
318
+ self.config.is_quantify_model = 1
319
+
320
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(self.model, self.config)
321
+ if self.interpreter is None:
322
+ print("build_interpretper_from_model_and_config failed !")
323
+ return
324
+ input_shapes = [[1, 3, 192, 192]]
325
+ output_shapes = [[1],[1,468,3]]
326
+ self.model.set_model_properties(input_shapes, aidlite.DataType.TYPE_FLOAT32,
327
+ output_shapes, aidlite.DataType.TYPE_FLOAT32)
328
+
329
+ if self.interpreter is None:
330
+ print("build_interpretper_from_model_and_config failed !")
331
+ result = self.interpreter.init()
332
+ if result != 0:
333
+ print(f"interpreter init failed !")
334
+ result = self.interpreter.load_model()
335
+ if result != 0:
336
+ print("interpreter load model failed !")
337
+
338
+ print(" model load success!")
339
+
340
+ def __call__(self, input):
341
+ self.interpreter.set_input_tensor(0,input)
342
+ invoke_time=[]
343
+ invoke_nums =10
344
+ for i in range(invoke_nums):
345
+ result = self.interpreter.set_input_tensor(0, input.data)
346
+ if result != 0:
347
+ print("interpreter set_input_tensor() failed")
348
+ t1=time.time()
349
+ result = self.interpreter.invoke()
350
+ cost_time = (time.time()-t1)*1000
351
+ invoke_time.append(cost_time)
352
+
353
+ max_invoke_time = max(invoke_time)
354
+ min_invoke_time = min(invoke_time)
355
+ mean_invoke_time = sum(invoke_time)/invoke_nums
356
+ var_invoketime=np.var(invoke_time)
357
+ print("====================================")
358
+ print(f"QNN LandMark invoke time:\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
359
+ print("====================================")
360
+ features_0 = self.interpreter.get_output_tensor(0).reshape(1).copy()
361
+ features_1 = self.interpreter.get_output_tensor(1).reshape(1,468,3).copy()
362
+ return features_0,features_1
363
+
364
+
365
+
366
+ anchors = torch.tensor(np.load(os.path.join(os.path.dirname(os.path.abspath(__file__)),"anchors_face_back.npy")), dtype=torch.float32, device='cpu')
367
+ # anchors_np = anchors.cpu().numpy().astype(np.float32)
368
+ # np.save("anchors_float32.npy", anchors_np)
369
+
370
+ face_detc = faceDetectionQnn()
371
+ face_rec = faceLandmarkQnn()
372
+
373
+ image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"coco.jpg")
374
+
375
+ frame_ct=0
376
+ image = cv2.imread(image_path)
377
+
378
+ frame = np.ascontiguousarray(image[:,:,::-1])
379
+
380
+ img1, img2, scale, pad = resize_pad(frame)
381
+
382
+ input = (img1 / 255).astype(np.float32)
383
+ input = np.transpose(input, (2, 0, 1))
384
+ input = input[np.newaxis, ...]
385
+ t0 = time.time()
386
+ out = face_detc(input)
387
+ use_time = round((time.time() - t0) * 1000, 2)
388
+ detections = post_process._tensors_to_detections(torch.from_numpy(out[0]), torch.from_numpy(out[1]), anchors)
389
+
390
+ filtered_detections = []
391
+ num_coords = 16
392
+ for i in range(len(detections)):
393
+ faces = post_process._weighted_non_max_suppression(detections[i])
394
+ faces = torch.stack(faces) if len(faces) > 0 else torch.zeros((0, num_coords+1))
395
+ filtered_detections.append(faces)
396
+
397
+ face_detections = denormalize_detections(filtered_detections[0], scale, pad)
398
+
399
+ xc, yc, scale, theta = post_process.detection2roi(face_detections)
400
+
401
+ img, affine, box = post_process.extract_roi(frame, xc, yc, theta, scale)
402
+ if box.size()[0]!=0:
403
+ t2 = time.time()
404
+ flags, normalized_landmarks = face_rec(img.numpy())
405
+
406
+ use_time = round((time.time() - t2) * 1000, 2)
407
+
408
+ landmarks = post_process.denormalize_landmarks(torch.from_numpy(normalized_landmarks), affine)
409
+
410
+ for i in range(len(flags)):
411
+ landmark, flag = landmarks[i], flags[i]
412
+ if flag>.4: # 0.5
413
+ draw_landmarks(frame, landmark[:,:2], FACE_CONNECTIONS, size=1)
414
+ else:
415
+ print("not detect face !")
416
+
417
+ draw_roi(frame, box)
418
+ draw_detections(frame, face_detections)
419
+ cv2.imwrite(os.path.join(os.path.dirname(os.path.abspath(__file__)),'%04d.jpg'%frame_ct), frame[:,:,::-1])
420
+ face_detc.interpreter.destory()
421
+ face_rec.interpreter.destory()
422
+
423
+
424
+
model_farm_mediapipefacedetection_qcs6490_qnn2.16_int8_aidlite/python/visualization.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import torch
4
+
5
+ def draw_detections(img, detections, with_keypoints=True):
6
+ if isinstance(detections, torch.Tensor):
7
+ detections = detections.cpu().numpy()
8
+
9
+ if detections.ndim == 1:
10
+ detections = np.expand_dims(detections, axis=0)
11
+
12
+ n_keypoints = detections.shape[1] // 2 - 2
13
+
14
+ for i in range(detections.shape[0]):
15
+ ymin = detections[i, 0]
16
+ xmin = detections[i, 1]
17
+ ymax = detections[i, 2]
18
+ xmax = detections[i, 3]
19
+
20
+ start_point = (int(xmin), int(ymin))
21
+ end_point = (int(xmax), int(ymax))
22
+ img = cv2.rectangle(img, start_point, end_point, (255, 0, 0), 1)
23
+
24
+ if with_keypoints:
25
+ for k in range(n_keypoints):
26
+ kp_x = int(detections[i, 4 + k*2 ])
27
+ kp_y = int(detections[i, 4 + k*2 + 1])
28
+ cv2.circle(img, (kp_x, kp_y), 2, (0, 0, 255), thickness=2)
29
+ return img
30
+
31
+
32
+ def draw_roi(img, roi):
33
+ for i in range(roi.shape[0]):
34
+ (x1,x2,x3,x4), (y1,y2,y3,y4) = roi[i]
35
+ cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,0), 2)
36
+ cv2.line(img, (int(x1), int(y1)), (int(x3), int(y3)), (0,255,0), 2)
37
+ cv2.line(img, (int(x2), int(y2)), (int(x4), int(y4)), (0,0,0), 2)
38
+ cv2.line(img, (int(x3), int(y3)), (int(x4), int(y4)), (0,0,0), 2)
39
+
40
+
41
+ def draw_landmarks(img, points, connections=[], color=(0, 255, 0), size=2):
42
+ points = points[:,:2]
43
+ for point in points:
44
+ x, y = point
45
+ x, y = int(x), int(y)
46
+ cv2.circle(img, (x, y), size, color, thickness=size)
47
+ for connection in connections:
48
+ x0, y0 = points[connection[0]]
49
+ x1, y1 = points[connection[1]]
50
+ x0, y0 = int(x0), int(y0)
51
+ x1, y1 = int(x1), int(y1)
52
+ cv2.line(img, (x0, y0), (x1, y1), (0,0,0), size)
53
+
54
+
55
+
56
+ # https://github.com/metalwhale/hand_tracking/blob/b2a650d61b4ab917a2367a05b85765b81c0564f2/run.py
57
+ # 8 12 16 20
58
+ # | | | |
59
+ # 7 11 15 19
60
+ # 4 | | | |
61
+ # | 6 10 14 18
62
+ # 3 | | | |
63
+ # | 5---9---13--17
64
+ # 2 \ /
65
+ # \ \ /
66
+ # 1 \ /
67
+ # \ \ /
68
+ # ------0-
69
+ HAND_CONNECTIONS = [
70
+ (0, 1), (1, 2), (2, 3), (3, 4),
71
+ (5, 6), (6, 7), (7, 8),
72
+ (9, 10), (10, 11), (11, 12),
73
+ (13, 14), (14, 15), (15, 16),
74
+ (17, 18), (18, 19), (19, 20),
75
+ (0, 5), (5, 9), (9, 13), (13, 17), (0, 17)
76
+ ]
77
+
78
+ POSE_CONNECTIONS = [
79
+ (0,1), (1,2), (2,3), (3,7),
80
+ (0,4), (4,5), (5,6), (6,8),
81
+ (9,10),
82
+ (11,13), (13,15), (15,17), (17,19), (19,15), (15,21),
83
+ (12,14), (14,16), (16,18), (18,20), (20,16), (16,22),
84
+ (11,12), (12,24), (24,23), (23,11)
85
+ ]
86
+
87
+ # Vertex indices can be found in
88
+ # github.com/google/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualisation.png
89
+ # Found in github.com/google/mediapipe/python/solutions/face_mesh.py
90
+ FACE_CONNECTIONS = [
91
+ # Lips.
92
+ (61, 146), (146, 91), (91, 181), (181, 84), (84, 17),
93
+ (17, 314), (314, 405), (405, 321), (321, 375), (375, 291),
94
+ (61, 185), (185, 40), (40, 39), (39, 37), (37, 0),
95
+ (0, 267), (267, 269), (269, 270), (270, 409), (409, 291),
96
+ (78, 95), (95, 88), (88, 178), (178, 87), (87, 14),
97
+ (14, 317), (317, 402), (402, 318), (318, 324), (324, 308),
98
+ (78, 191), (191, 80), (80, 81), (81, 82), (82, 13),
99
+ (13, 312), (312, 311), (311, 310), (310, 415), (415, 308),
100
+ # Left eye.
101
+ (263, 249), (249, 390), (390, 373), (373, 374), (374, 380),
102
+ (380, 381), (381, 382), (382, 362), (263, 466), (466, 388),
103
+ (388, 387), (387, 386), (386, 385), (385, 384), (384, 398),
104
+ (398, 362),
105
+ # Left eyebrow.
106
+ (276, 283), (283, 282), (282, 295), (295, 285), (300, 293),
107
+ (293, 334), (334, 296), (296, 336),
108
+ # Right eye.
109
+ (33, 7), (7, 163), (163, 144), (144, 145), (145, 153),
110
+ (153, 154), (154, 155), (155, 133), (33, 246), (246, 161),
111
+ (161, 160), (160, 159), (159, 158), (158, 157), (157, 173),
112
+ (173, 133),
113
+ # Right eyebrow.
114
+ (46, 53), (53, 52), (52, 65), (65, 55), (70, 63), (63, 105),
115
+ (105, 66), (66, 107),
116
+ # Face oval.
117
+ (10, 338), (338, 297), (297, 332), (332, 284), (284, 251),
118
+ (251, 389), (389, 356), (356, 454), (454, 323), (323, 361),
119
+ (361, 288), (288, 397), (397, 365), (365, 379), (379, 378),
120
+ (378, 400), (400, 377), (377, 152), (152, 148), (148, 176),
121
+ (176, 149), (149, 150), (150, 136), (136, 172), (172, 58),
122
+ (58, 132), (132, 93), (93, 234), (234, 127), (127, 162),
123
+ (162, 21), (21, 54), (54, 103), (103, 67), (67, 109),
124
+ (109, 10)
125
+ ]
model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ### Source model
3
+ - Input shape: [1x3x256x256],[1x3x192x192]
4
+ - Number of parameters:0.13M, 0.6M
5
+ - Model size:0.58MB, 2.32MB
6
+ - Output shape: [1x896x16, 1x896x1], [1, 1x486x3]
7
+
8
+ Source model repository: [MediaPipe-Face-Detection](https://github.com/zmurez/MediaPipePyTorch/)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: FP16
13
+ - Backend: QNN2.16
14
+ - Target Device: SNM972 QCS8550
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ #### python
43
+ ```bash
44
+ cd python
45
+ python3 demo_qnn.py
46
+ ```
47
+
48
+ #### c++
49
+ ```bash
50
+ # 加载.npy文件需要用到cnpy库(终端默认路径下执行即可)
51
+ git clone https://github.com/rogersce/cnpy.git
52
+ cd cnpy
53
+ mkdir build && cd build
54
+ cmake ..
55
+ make
56
+ sudo make install
57
+
58
+ cd mediapipe-face-detection/model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/cpp
59
+ mkdir build && cd build
60
+ cmake ..
61
+ make
62
+ ./run_test
63
+ ```
model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/__pycache__/blazebase.cpython-38.pyc ADDED
Binary file (16.5 kB). View file
 
model_farm_mediapipefacedetection_qcs8550_qnn2.16_fp16_aidlite/__pycache__/blazebase.cpython-39.pyc ADDED
Binary file (16.5 kB). View file