Patrick Keane
commited on
Commit
·
92f2071
1
Parent(s):
83e9fc1
Add SFace face recognizer cpp demo (#259)
Browse files
models/face_recognition_sface/CMakeLists.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required(VERSION 3.24.0)
|
2 |
+
project(opencv_zoo_face_recognition_sface)
|
3 |
+
|
4 |
+
set(OPENCV_VERSION "4.9.0")
|
5 |
+
set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
|
6 |
+
|
7 |
+
# Find OpenCV
|
8 |
+
find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
|
9 |
+
|
10 |
+
add_executable(demo demo.cpp)
|
11 |
+
target_link_libraries(demo ${OpenCV_LIBS})
|
models/face_recognition_sface/README.md
CHANGED
@@ -24,6 +24,7 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
|
24 |
|
25 |
Run the following command to try the demo:
|
26 |
|
|
|
27 |
```shell
|
28 |
# recognize on images
|
29 |
python demo.py --target /path/to/image1 --query /path/to/image2
|
@@ -32,6 +33,22 @@ python demo.py --target /path/to/image1 --query /path/to/image2
|
|
32 |
python demo.py --help
|
33 |
```
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
### Example outputs
|
36 |
|
37 |

|
|
|
24 |
|
25 |
Run the following command to try the demo:
|
26 |
|
27 |
+
### Python
|
28 |
```shell
|
29 |
# recognize on images
|
30 |
python demo.py --target /path/to/image1 --query /path/to/image2
|
|
|
33 |
python demo.py --help
|
34 |
```
|
35 |
|
36 |
+
### C++
|
37 |
+
Install latest OpenCV and CMake >= 3.24.0 to get started with:
|
38 |
+
|
39 |
+
```shell
|
40 |
+
# A typical and default installation path of OpenCV is /usr/local
|
41 |
+
cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
|
42 |
+
cmake --build build
|
43 |
+
|
44 |
+
# detect on camera input
|
45 |
+
./build/demo -t=/path/to/target_face
|
46 |
+
# detect on an image
|
47 |
+
./build/demo -t=/path/to/target_face -q=/path/to/query_face -v
|
48 |
+
# get help messages
|
49 |
+
./build/demo -h
|
50 |
+
```
|
51 |
+
|
52 |
### Example outputs
|
53 |
|
54 |

|
models/face_recognition_sface/demo.cpp
ADDED
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include "opencv2/opencv.hpp"
|
2 |
+
#include "opencv2/core/types.hpp"
|
3 |
+
|
4 |
+
#include <string>
|
5 |
+
#include <vector>
|
6 |
+
|
7 |
+
const std::vector<std::pair<int, int>> backend_target_pairs = {
|
8 |
+
{cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU},
|
9 |
+
{cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA},
|
10 |
+
{cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16},
|
11 |
+
{cv::dnn::DNN_BACKEND_TIMVX, cv::dnn::DNN_TARGET_NPU},
|
12 |
+
{cv::dnn::DNN_BACKEND_CANN, cv::dnn::DNN_TARGET_NPU}
|
13 |
+
};
|
14 |
+
|
15 |
+
class YuNet
|
16 |
+
{
|
17 |
+
public:
|
18 |
+
YuNet(const std::string& model_path,
|
19 |
+
const cv::Size& input_size,
|
20 |
+
const float conf_threshold,
|
21 |
+
const float nms_threshold,
|
22 |
+
const int top_k,
|
23 |
+
const int backend_id,
|
24 |
+
const int target_id)
|
25 |
+
{
|
26 |
+
_detector = cv::FaceDetectorYN::create(
|
27 |
+
model_path, "", input_size, conf_threshold, nms_threshold, top_k, backend_id, target_id);
|
28 |
+
}
|
29 |
+
|
30 |
+
void setInputSize(const cv::Size& input_size)
|
31 |
+
{
|
32 |
+
_detector->setInputSize(input_size);
|
33 |
+
}
|
34 |
+
|
35 |
+
void setTopK(const int top_k)
|
36 |
+
{
|
37 |
+
_detector->setTopK(top_k);
|
38 |
+
}
|
39 |
+
|
40 |
+
cv::Mat infer(const cv::Mat& image)
|
41 |
+
{
|
42 |
+
cv::Mat result;
|
43 |
+
_detector->detect(image, result);
|
44 |
+
return result;
|
45 |
+
}
|
46 |
+
|
47 |
+
private:
|
48 |
+
cv::Ptr<cv::FaceDetectorYN> _detector;
|
49 |
+
};
|
50 |
+
|
51 |
+
class SFace
|
52 |
+
{
|
53 |
+
public:
|
54 |
+
SFace(const std::string& model_path,
|
55 |
+
const int backend_id,
|
56 |
+
const int target_id,
|
57 |
+
const int distance_type)
|
58 |
+
: _distance_type(static_cast<cv::FaceRecognizerSF::DisType>(distance_type))
|
59 |
+
{
|
60 |
+
_recognizer = cv::FaceRecognizerSF::create(model_path, "", backend_id, target_id);
|
61 |
+
}
|
62 |
+
|
63 |
+
cv::Mat extractFeatures(const cv::Mat& orig_image, const cv::Mat& face_image)
|
64 |
+
{
|
65 |
+
// Align and crop detected face from original image
|
66 |
+
cv::Mat target_aligned;
|
67 |
+
_recognizer->alignCrop(orig_image, face_image, target_aligned);
|
68 |
+
// Extract features from cropped detected face
|
69 |
+
cv::Mat target_features;
|
70 |
+
_recognizer->feature(target_aligned, target_features);
|
71 |
+
return target_features.clone();
|
72 |
+
}
|
73 |
+
|
74 |
+
std::pair<double, bool> matchFeatures(const cv::Mat& target_features, const cv::Mat& query_features)
|
75 |
+
{
|
76 |
+
const double score = _recognizer->match(target_features, query_features, _distance_type);
|
77 |
+
if (_distance_type == cv::FaceRecognizerSF::DisType::FR_COSINE)
|
78 |
+
{
|
79 |
+
return {score, score >= _threshold_cosine};
|
80 |
+
}
|
81 |
+
return {score, score <= _threshold_norml2};
|
82 |
+
}
|
83 |
+
|
84 |
+
private:
|
85 |
+
cv::Ptr<cv::FaceRecognizerSF> _recognizer;
|
86 |
+
cv::FaceRecognizerSF::DisType _distance_type;
|
87 |
+
double _threshold_cosine = 0.363;
|
88 |
+
double _threshold_norml2 = 1.128;
|
89 |
+
};
|
90 |
+
|
91 |
+
cv::Mat visualize(const cv::Mat& image,
|
92 |
+
const cv::Mat& faces,
|
93 |
+
const std::vector<std::pair<double, bool>>& matches,
|
94 |
+
const float fps = -0.1F,
|
95 |
+
const cv::Size& target_size = cv::Size(512, 512))
|
96 |
+
{
|
97 |
+
static const cv::Scalar matched_box_color{0, 255, 0};
|
98 |
+
static const cv::Scalar mismatched_box_color{0, 0, 255};
|
99 |
+
|
100 |
+
if (fps >= 0)
|
101 |
+
{
|
102 |
+
cv::Mat output_image = image.clone();
|
103 |
+
|
104 |
+
const int x1 = static_cast<int>(faces.at<float>(0, 0));
|
105 |
+
const int y1 = static_cast<int>(faces.at<float>(0, 1));
|
106 |
+
const int w = static_cast<int>(faces.at<float>(0, 2));
|
107 |
+
const int h = static_cast<int>(faces.at<float>(0, 3));
|
108 |
+
const auto match = matches.at(0);
|
109 |
+
|
110 |
+
cv::Scalar box_color = match.second ? matched_box_color : mismatched_box_color;
|
111 |
+
// Draw bounding box
|
112 |
+
cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
|
113 |
+
// Draw match score
|
114 |
+
cv::putText(output_image, cv::format("%.4f", match.first), cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.30, box_color);
|
115 |
+
// Draw FPS
|
116 |
+
cv::putText(output_image, cv::format("FPS: %.2f", fps), cv::Point(0, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, box_color, 2);
|
117 |
+
|
118 |
+
return output_image;
|
119 |
+
}
|
120 |
+
|
121 |
+
cv::Mat output_image = cv::Mat::zeros(target_size, CV_8UC3);
|
122 |
+
|
123 |
+
// Determine new height and width of image with aspect ratio of original image
|
124 |
+
const double ratio = std::min(static_cast<double>(target_size.height) / image.rows,
|
125 |
+
static_cast<double>(target_size.width) / image.cols);
|
126 |
+
const int new_height = static_cast<int>(image.rows * ratio);
|
127 |
+
const int new_width = static_cast<int>(image.cols * ratio);
|
128 |
+
|
129 |
+
// Resize the original image, maintaining aspect ratio
|
130 |
+
cv::Mat resize_out;
|
131 |
+
cv::resize(image, resize_out, cv::Size(new_width, new_height), cv::INTER_LINEAR);
|
132 |
+
|
133 |
+
// Determine top left corner in resized dimensions
|
134 |
+
const int top = std::max(0, target_size.height - new_height) / 2;
|
135 |
+
const int left = std::max(0, target_size.width - new_width) / 2;
|
136 |
+
|
137 |
+
// Copy resized image into target output image
|
138 |
+
const cv::Rect roi = cv::Rect(cv::Point(left, top), cv::Size(new_width, new_height));
|
139 |
+
cv::Mat out_sub_image = output_image(roi);
|
140 |
+
resize_out.copyTo(out_sub_image);
|
141 |
+
|
142 |
+
for (int i = 0; i < faces.rows; ++i)
|
143 |
+
{
|
144 |
+
const int x1 = static_cast<int>(faces.at<float>(i, 0) * ratio) + left;
|
145 |
+
const int y1 = static_cast<int>(faces.at<float>(i, 1) * ratio) + top;
|
146 |
+
const int w = static_cast<int>(faces.at<float>(i, 2) * ratio);
|
147 |
+
const int h = static_cast<int>(faces.at<float>(i, 3) * ratio);
|
148 |
+
const auto match = matches.at(i);
|
149 |
+
|
150 |
+
cv::Scalar box_color = match.second ? matched_box_color : mismatched_box_color;
|
151 |
+
// Draw bounding box
|
152 |
+
cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
|
153 |
+
// Draw match score
|
154 |
+
cv::putText(output_image, cv::format("%.4f", match.first), cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.30, box_color);
|
155 |
+
}
|
156 |
+
return output_image;
|
157 |
+
}
|
158 |
+
|
159 |
+
int main(int argc, char** argv)
|
160 |
+
{
|
161 |
+
cv::CommandLineParser parser(argc, argv,
|
162 |
+
// General options
|
163 |
+
"{help h | | Print this message}"
|
164 |
+
"{backend_target b | 0 | Set DNN backend target pair:\n"
|
165 |
+
"0: (default) OpenCV implementation + CPU,\n"
|
166 |
+
"1: CUDA + GPU (CUDA),\n"
|
167 |
+
"2: CUDA + GPU (CUDA FP16),\n"
|
168 |
+
"3: TIM-VX + NPU,\n"
|
169 |
+
"4: CANN + NPU}"
|
170 |
+
"{save s | false | Whether to save result image or not}"
|
171 |
+
"{vis v | false | Whether to visualize result image or not}"
|
172 |
+
// SFace options
|
173 |
+
"{target_face t | | Set path to input image 1 (target face)}"
|
174 |
+
"{query_face q | | Set path to input image 2 (query face), omit if using camera}"
|
175 |
+
"{model m | face_recognition_sface_2021dec.onnx | Set path to the model}"
|
176 |
+
"{distance_type d | 0 | 0 = cosine, 1 = norm_l1}"
|
177 |
+
// YuNet options
|
178 |
+
"{yunet_model | ../face_detection_yunet/face_detection_yunet_2023mar.onnx | Set path to the YuNet model}"
|
179 |
+
"{detect_threshold | 0.9 | Set the minimum confidence for the model\n"
|
180 |
+
"to identify a face. Filter out faces of\n"
|
181 |
+
"conf < conf_threshold}"
|
182 |
+
"{nms_threshold | 0.3 | Set the threshold to suppress overlapped boxes.\n"
|
183 |
+
"Suppress boxes if IoU(box1, box2) >= nms_threshold\n"
|
184 |
+
", the one of higher score is kept.}"
|
185 |
+
"{top_k | 5000 | Keep top_k bounding boxes before NMS}"
|
186 |
+
);
|
187 |
+
|
188 |
+
if (parser.has("help"))
|
189 |
+
{
|
190 |
+
parser.printMessage();
|
191 |
+
return 0;
|
192 |
+
}
|
193 |
+
// General CLI options
|
194 |
+
const int backend = parser.get<int>("backend_target");
|
195 |
+
const bool save_flag = parser.get<bool>("save");
|
196 |
+
const bool vis_flag = parser.get<bool>("vis");
|
197 |
+
const int backend_id = backend_target_pairs.at(backend).first;
|
198 |
+
const int target_id = backend_target_pairs.at(backend).second;
|
199 |
+
|
200 |
+
// YuNet CLI options
|
201 |
+
const std::string detector_model_path = parser.get<std::string>("yunet_model");
|
202 |
+
const float detect_threshold = parser.get<float>("detect_threshold");
|
203 |
+
const float nms_threshold = parser.get<float>("nms_threshold");
|
204 |
+
const int top_k = parser.get<int>("top_k");
|
205 |
+
|
206 |
+
// Use YuNet as the detector backend
|
207 |
+
auto face_detector = YuNet(
|
208 |
+
detector_model_path, cv::Size(320, 320), detect_threshold, nms_threshold, top_k, backend_id, target_id);
|
209 |
+
|
210 |
+
// SFace CLI options
|
211 |
+
const std::string target_path = parser.get<std::string>("target_face");
|
212 |
+
const std::string query_path = parser.get<std::string>("query_face");
|
213 |
+
const std::string model_path = parser.get<std::string>("model");
|
214 |
+
const int distance_type = parser.get<int>("distance_type");
|
215 |
+
|
216 |
+
auto face_recognizer = SFace(model_path, backend_id, target_id, distance_type);
|
217 |
+
|
218 |
+
if (target_path.empty())
|
219 |
+
{
|
220 |
+
CV_Error(cv::Error::StsError, "Path to target image " + target_path + " not found");
|
221 |
+
}
|
222 |
+
|
223 |
+
cv::Mat target_image = cv::imread(target_path);
|
224 |
+
// Detect single face in target image
|
225 |
+
face_detector.setInputSize(target_image.size());
|
226 |
+
face_detector.setTopK(1);
|
227 |
+
cv::Mat target_face = face_detector.infer(target_image);
|
228 |
+
// Extract features from target face
|
229 |
+
cv::Mat target_features = face_recognizer.extractFeatures(target_image, target_face.row(0));
|
230 |
+
|
231 |
+
if (!query_path.empty()) // use image
|
232 |
+
{
|
233 |
+
// Detect any faces in query image
|
234 |
+
cv::Mat query_image = cv::imread(query_path);
|
235 |
+
face_detector.setInputSize(query_image.size());
|
236 |
+
face_detector.setTopK(5000);
|
237 |
+
cv::Mat query_faces = face_detector.infer(query_image);
|
238 |
+
|
239 |
+
// Store match scores for visualization
|
240 |
+
std::vector<std::pair<double, bool>> matches;
|
241 |
+
|
242 |
+
for (int i = 0; i < query_faces.rows; ++i)
|
243 |
+
{
|
244 |
+
// Extract features from query face
|
245 |
+
cv::Mat query_features = face_recognizer.extractFeatures(query_image, query_faces.row(i));
|
246 |
+
// Measure similarity of target face to query face
|
247 |
+
const auto match = face_recognizer.matchFeatures(target_features, query_features);
|
248 |
+
matches.push_back(match);
|
249 |
+
|
250 |
+
const int x1 = static_cast<int>(query_faces.at<float>(i, 0));
|
251 |
+
const int y1 = static_cast<int>(query_faces.at<float>(i, 1));
|
252 |
+
const int w = static_cast<int>(query_faces.at<float>(i, 2));
|
253 |
+
const int h = static_cast<int>(query_faces.at<float>(i, 3));
|
254 |
+
const float conf = query_faces.at<float>(i, 14);
|
255 |
+
|
256 |
+
std::cout << cv::format("%d: x1=%d, y1=%d, w=%d, h=%d, conf=%.4f, match=%.4f\n", i, x1, y1, w, h, conf, match.first);
|
257 |
+
}
|
258 |
+
|
259 |
+
if (save_flag || vis_flag)
|
260 |
+
{
|
261 |
+
auto vis_target = visualize(target_image, target_face, {{1.0, true}});
|
262 |
+
auto vis_query = visualize(query_image, query_faces, matches);
|
263 |
+
cv::Mat output_image;
|
264 |
+
cv::hconcat(vis_target, vis_query, output_image);
|
265 |
+
|
266 |
+
if (save_flag)
|
267 |
+
{
|
268 |
+
std::cout << "Results are saved to result.jpg\n";
|
269 |
+
cv::imwrite("result.jpg", output_image);
|
270 |
+
}
|
271 |
+
if (vis_flag)
|
272 |
+
{
|
273 |
+
cv::namedWindow(query_path, cv::WINDOW_AUTOSIZE);
|
274 |
+
cv::imshow(query_path, output_image);
|
275 |
+
cv::waitKey(0);
|
276 |
+
}
|
277 |
+
}
|
278 |
+
}
|
279 |
+
else // use video capture
|
280 |
+
{
|
281 |
+
const int device_id = 0;
|
282 |
+
auto cap = cv::VideoCapture(device_id);
|
283 |
+
const int w = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
|
284 |
+
const int h = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
|
285 |
+
face_detector.setInputSize(cv::Size(w, h));
|
286 |
+
|
287 |
+
auto tick_meter = cv::TickMeter();
|
288 |
+
cv::Mat query_frame;
|
289 |
+
|
290 |
+
while (cv::waitKey(1) < 0)
|
291 |
+
{
|
292 |
+
bool has_frame = cap.read(query_frame);
|
293 |
+
if (!has_frame)
|
294 |
+
{
|
295 |
+
std::cout << "No frames grabbed! Exiting ...\n";
|
296 |
+
break;
|
297 |
+
}
|
298 |
+
tick_meter.start();
|
299 |
+
// Detect faces from webcam image
|
300 |
+
cv::Mat query_faces = face_detector.infer(query_frame);
|
301 |
+
tick_meter.stop();
|
302 |
+
|
303 |
+
// Extract features from query face
|
304 |
+
cv::Mat query_features = face_recognizer.extractFeatures(query_frame, query_faces.row(0));
|
305 |
+
// Measure similarity of target face to query face
|
306 |
+
const auto match = face_recognizer.matchFeatures(target_features, query_features);
|
307 |
+
|
308 |
+
const auto fps = static_cast<float>(tick_meter.getFPS());
|
309 |
+
|
310 |
+
auto vis_target = visualize(target_image, target_face, {{1.0, true}}, -0.1F, cv::Size(w, h));
|
311 |
+
auto vis_query = visualize(query_frame, query_faces, {match}, fps);
|
312 |
+
cv::Mat output_image;
|
313 |
+
cv::hconcat(vis_target, vis_query, output_image);
|
314 |
+
|
315 |
+
// Visualize in a new window
|
316 |
+
cv::imshow("SFace Demo", output_image);
|
317 |
+
|
318 |
+
tick_meter.reset();
|
319 |
+
}
|
320 |
+
}
|
321 |
+
return 0;
|
322 |
+
}
|