DaniAffCH commited on
Commit
adfd76b
·
1 Parent(s): 8a1a70c

C++ Demo - Facial Expression Recognition (#233)

Browse files

* cpp demo for facial expression recognition

* minor pr fix

* add empty line

* specified cxx version in the cmake list

models/facial_expression_recognition/CMakeLists.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.24)
2
+ set(CMAKE_CXX_STANDARD 11)
3
+ set(project_name "opencv_zoo_face_expression_recognition")
4
+
5
+ PROJECT (${project_name})
6
+
7
+ set(OPENCV_VERSION "4.9.0")
8
+ set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
9
+ find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
10
+ # Find OpenCV, you may need to set OpenCV_DIR variable
11
+ # to the absolute path to the directory containing OpenCVConfig.cmake file
12
+ # via the command line or GUI
13
+
14
+ file(GLOB SourceFile
15
+ "demo.cpp")
16
+ # If the package has been found, several variables will
17
+ # be set, you can find the full list with descriptions
18
+ # in the OpenCVConfig.cmake file.
19
+ # Print some message showing some of them
20
+ message(STATUS "OpenCV library status:")
21
+ message(STATUS " config: ${OpenCV_DIR}")
22
+ message(STATUS " version: ${OpenCV_VERSION}")
23
+ message(STATUS " libraries: ${OpenCV_LIBS}")
24
+ message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
25
+
26
+ # Declare the executable target built from your sources
27
+ add_executable(${project_name} ${SourceFile})
28
+
29
+ # Link your application with OpenCV libraries
30
+ target_link_libraries(${project_name} PRIVATE ${OpenCV_LIBS})
models/facial_expression_recognition/README.md CHANGED
@@ -19,12 +19,30 @@ Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html).
19
 
20
  ***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep).
21
 
 
22
  Run the following command to try the demo:
23
  ```shell
24
  # recognize the facial expression on images
25
  python demo.py --input /path/to/image -v
26
  ```
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ### Example outputs
29
 
30
  Note: Zoom in to to see the recognized facial expression in the top-left corner of each face boxes.
 
19
 
20
  ***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep).
21
 
22
+ ### Python
23
  Run the following command to try the demo:
24
  ```shell
25
  # recognize the facial expression on images
26
  python demo.py --input /path/to/image -v
27
  ```
28
 
29
+ ### C++
30
+
31
+ Install latest OpenCV and CMake >= 3.24.0 to get started with:
32
+
33
+ ```shell
34
+ # A typical and default installation path of OpenCV is /usr/local
35
+ cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
36
+ cmake --build build
37
+
38
+ # detect on camera input
39
+ ./build/opencv_zoo_face_expression_recognition
40
+ # detect on an image
41
+ ./build/opencv_zoo_face_expression_recognition -i=/path/to/image
42
+ # get help messages
43
+ ./build/opencv_zoo_face_expression_recognition -h
44
+ ```
45
+
46
  ### Example outputs
47
 
48
  Note: Zoom in to to see the recognized facial expression in the top-left corner of each face boxes.
models/facial_expression_recognition/demo.cpp ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "opencv2/opencv.hpp"
2
+
3
+ #include <map>
4
+ #include <vector>
5
+ #include <string>
6
+ #include <iostream>
7
+
8
+ using namespace std;
9
+ using namespace cv;
10
+ using namespace dnn;
11
+
12
+ std::vector<std::pair<int, int>> backend_target_pairs = {
13
+ {DNN_BACKEND_OPENCV, DNN_TARGET_CPU},
14
+ {DNN_BACKEND_CUDA, DNN_TARGET_CUDA},
15
+ {DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16},
16
+ {DNN_BACKEND_TIMVX, DNN_TARGET_NPU},
17
+ {DNN_BACKEND_CANN, DNN_TARGET_NPU}
18
+ };
19
+
20
+ class FER
21
+ {
22
+ private:
23
+ Net model;
24
+ string modelPath;
25
+ float std[5][2] = {
26
+ {38.2946, 51.6963},
27
+ {73.5318, 51.5014},
28
+ {56.0252, 71.7366},
29
+ {41.5493, 92.3655},
30
+ {70.7299, 92.2041}
31
+ };
32
+ vector<String> expressionEnum = {
33
+ "angry", "disgust", "fearful",
34
+ "happy", "neutral", "sad", "surprised"
35
+ };
36
+ Mat stdPoints = Mat(5, 2, CV_32F, this->std);
37
+ Size patchSize = Size(112,112);
38
+ Scalar imageMean = Scalar(0.5,0.5,0.5);
39
+ Scalar imageStd = Scalar(0.5,0.5,0.5);
40
+
41
+ const String inputNames = "data";
42
+ const String outputNames = "label";
43
+
44
+ int backend_id;
45
+ int target_id;
46
+
47
+ public:
48
+ FER(const string& modelPath,
49
+ int backend_id = 0,
50
+ int target_id = 0)
51
+ : modelPath(modelPath), backend_id(backend_id), target_id(target_id)
52
+ {
53
+ this->model = readNet(modelPath);
54
+ this->model.setPreferableBackend(backend_id);
55
+ this->model.setPreferableTarget(target_id);
56
+ }
57
+
58
+ Mat preprocess(const Mat image, const Mat points)
59
+ {
60
+ // image alignment
61
+ Mat transformation = estimateAffine2D(points, this->stdPoints);
62
+ Mat aligned = Mat::zeros(this->patchSize.height, this->patchSize.width, image.type());
63
+ warpAffine(image, aligned, transformation, this->patchSize);
64
+
65
+ // image normalization
66
+ aligned.convertTo(aligned, CV_32F, 1.0 / 255.0);
67
+ aligned -= imageMean;
68
+ aligned /= imageStd;
69
+
70
+ return blobFromImage(aligned);;
71
+ }
72
+
73
+ String infer(const Mat image, const Mat facePoints)
74
+ {
75
+ Mat points = facePoints(Rect(4, 0, facePoints.cols-5, facePoints.rows)).reshape(2, 5);
76
+ Mat inputBlob = preprocess(image, points);
77
+
78
+ this->model.setInput(inputBlob, this->inputNames);
79
+ Mat outputBlob = this->model.forward(this->outputNames);
80
+
81
+ Point maxLoc;
82
+ minMaxLoc(outputBlob, nullptr, nullptr, nullptr, &maxLoc);
83
+
84
+ return getDesc(maxLoc.x);
85
+ }
86
+
87
+ String getDesc(int ind)
88
+ {
89
+
90
+ if (ind >= 0 && ind < this->expressionEnum.size())
91
+ {
92
+ return this->expressionEnum[ind];
93
+ }
94
+ else
95
+ {
96
+ cerr << "Error: Index out of bounds." << endl;
97
+ return "";
98
+ }
99
+ }
100
+
101
+ };
102
+
103
+ class YuNet
104
+ {
105
+ public:
106
+ YuNet(const string& model_path,
107
+ const Size& input_size = Size(320, 320),
108
+ float conf_threshold = 0.6f,
109
+ float nms_threshold = 0.3f,
110
+ int top_k = 5000,
111
+ int backend_id = 0,
112
+ int target_id = 0)
113
+ : model_path_(model_path), input_size_(input_size),
114
+ conf_threshold_(conf_threshold), nms_threshold_(nms_threshold),
115
+ top_k_(top_k), backend_id_(backend_id), target_id_(target_id)
116
+ {
117
+ model = FaceDetectorYN::create(model_path_, "", input_size_, conf_threshold_, nms_threshold_, top_k_, backend_id_, target_id_);
118
+ }
119
+
120
+ void setBackendAndTarget(int backend_id, int target_id)
121
+ {
122
+ backend_id_ = backend_id;
123
+ target_id_ = target_id;
124
+ model = FaceDetectorYN::create(model_path_, "", input_size_, conf_threshold_, nms_threshold_, top_k_, backend_id_, target_id_);
125
+ }
126
+
127
+ /* Overwrite the input size when creating the model. Size format: [Width, Height].
128
+ */
129
+ void setInputSize(const Size& input_size)
130
+ {
131
+ input_size_ = input_size;
132
+ model->setInputSize(input_size_);
133
+ }
134
+
135
+ Mat infer(const Mat image)
136
+ {
137
+ Mat res;
138
+ model->detect(image, res);
139
+ return res;
140
+ }
141
+
142
+ private:
143
+ Ptr<FaceDetectorYN> model;
144
+
145
+ string model_path_;
146
+ Size input_size_;
147
+ float conf_threshold_;
148
+ float nms_threshold_;
149
+ int top_k_;
150
+ int backend_id_;
151
+ int target_id_;
152
+ };
153
+
154
+ cv::Mat visualize(const cv::Mat& image, const cv::Mat& faces, const vector<String> expressions, float fps = -1.f)
155
+ {
156
+ static cv::Scalar box_color{0, 255, 0};
157
+ static std::vector<cv::Scalar> landmark_color{
158
+ cv::Scalar(255, 0, 0), // right eye
159
+ cv::Scalar( 0, 0, 255), // left eye
160
+ cv::Scalar( 0, 255, 0), // nose tip
161
+ cv::Scalar(255, 0, 255), // right mouth corner
162
+ cv::Scalar( 0, 255, 255) // left mouth corner
163
+ };
164
+ static cv::Scalar text_color{0, 255, 0};
165
+
166
+ auto output_image = image.clone();
167
+
168
+ if (fps >= 0)
169
+ {
170
+ cv::putText(output_image, cv::format("FPS: %.2f", fps), cv::Point(0, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, text_color, 2);
171
+ }
172
+
173
+ for (int i = 0; i < faces.rows; ++i)
174
+ {
175
+ // Draw bounding boxes
176
+ int x1 = static_cast<int>(faces.at<float>(i, 0));
177
+ int y1 = static_cast<int>(faces.at<float>(i, 1));
178
+ int w = static_cast<int>(faces.at<float>(i, 2));
179
+ int h = static_cast<int>(faces.at<float>(i, 3));
180
+ cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
181
+
182
+ // Expression as text
183
+ String exp = expressions[i];
184
+ cv::putText(output_image, exp, cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.5, text_color);
185
+
186
+ // Draw landmarks
187
+ for (int j = 0; j < landmark_color.size(); ++j)
188
+ {
189
+ int x = static_cast<int>(faces.at<float>(i, 2*j+4)), y = static_cast<int>(faces.at<float>(i, 2*j+5));
190
+ cv::circle(output_image, cv::Point(x, y), 2, landmark_color[j], 2);
191
+ }
192
+ }
193
+ return output_image;
194
+ }
195
+
196
+ string keys =
197
+ "{ help h | | Print help message. }"
198
+ "{ model m | facial_expression_recognition_mobilefacenet_2022july.onnx | Usage: Path to the model, defaults to facial_expression_recognition_mobilefacenet_2022july.onnx }"
199
+ "{ yunet_model ym | ../face_detection_yunet/face_detection_yunet_2023mar.onnx | Usage: Path to the face detection yunet model, defaults to face_detection_yunet_2023mar.onnx }"
200
+ "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
201
+ "{ backend_target t | 0 | Choose one of the backend-target pair to run this demo:\n"
202
+ "0: (default) OpenCV implementation + CPU,\n"
203
+ "1: CUDA + GPU (CUDA),\n"
204
+ "2: CUDA + GPU (CUDA FP16),\n"
205
+ "3: TIM-VX + NPU,\n"
206
+ "4: CANN + NPU}"
207
+ "{ save s | false | Specify to save results.}"
208
+ "{ vis v | true | Specify to open a window for result visualization.}"
209
+ ;
210
+
211
+
212
+ int main(int argc, char** argv)
213
+ {
214
+ CommandLineParser parser(argc, argv, keys);
215
+
216
+ parser.about("Facial Expression Recognition");
217
+ if (parser.has("help"))
218
+ {
219
+ parser.printMessage();
220
+ return 0;
221
+ }
222
+
223
+ string modelPath = parser.get<string>("model");
224
+ string yunetModelPath = parser.get<string>("yunet_model");
225
+ string inputPath = parser.get<string>("input");
226
+ uint8_t backendTarget = parser.get<uint8_t>("backend_target");
227
+ bool saveFlag = parser.get<bool>("save");
228
+ bool visFlag = parser.get<bool>("vis");
229
+
230
+ if (modelPath.empty())
231
+ CV_Error(Error::StsError, "Model file " + modelPath + " not found");
232
+
233
+ if (yunetModelPath.empty())
234
+ CV_Error(Error::StsError, "Face Detection Model file " + yunetModelPath + " not found");
235
+
236
+ YuNet faceDetectionModel(yunetModelPath);
237
+ FER expressionRecognitionModel(modelPath, backend_target_pairs[backendTarget].first, backend_target_pairs[backendTarget].second);
238
+
239
+ VideoCapture cap;
240
+ if (!inputPath.empty())
241
+ cap.open(samples::findFile(inputPath));
242
+ else
243
+ cap.open(0);
244
+
245
+ if (!cap.isOpened())
246
+ CV_Error(Error::StsError, "Cannot opend video or file");
247
+
248
+ Mat frame;
249
+ static const std::string kWinName = "Facial Expression Demo";
250
+
251
+
252
+ while (waitKey(1) < 0)
253
+ {
254
+ cap >> frame;
255
+
256
+ if (frame.empty())
257
+ {
258
+ if(inputPath.empty())
259
+ cout << "Frame is empty" << endl;
260
+ break;
261
+ }
262
+
263
+ faceDetectionModel.setInputSize(frame.size());
264
+
265
+ Mat faces = faceDetectionModel.infer(frame);
266
+ vector<String> expressions;
267
+
268
+ for (int i = 0; i < faces.rows; ++i)
269
+ {
270
+ Mat face = faces.row(i);
271
+ String exp = expressionRecognitionModel.infer(frame, face);
272
+ expressions.push_back(exp);
273
+
274
+ int x1 = static_cast<int>(faces.at<float>(i, 0));
275
+ int y1 = static_cast<int>(faces.at<float>(i, 1));
276
+ int w = static_cast<int>(faces.at<float>(i, 2));
277
+ int h = static_cast<int>(faces.at<float>(i, 3));
278
+ float conf = faces.at<float>(i, 14);
279
+
280
+ std::cout << cv::format("%d: x1=%d, y1=%d, w=%d, h=%d, conf=%.4f expression=%s\n", i, x1, y1, w, h, conf, exp.c_str());
281
+
282
+ }
283
+
284
+ Mat res_frame = visualize(frame, faces, expressions);
285
+
286
+ if(visFlag || inputPath.empty())
287
+ {
288
+ imshow(kWinName, res_frame);
289
+ if(!inputPath.empty())
290
+ waitKey(0);
291
+ }
292
+ if(saveFlag)
293
+ {
294
+ cout << "Results are saved to result.jpg" << endl;
295
+
296
+ cv::imwrite("result.jpg", res_frame);
297
+ }
298
+ }
299
+
300
+
301
+ return 0;
302
+
303
+ }
304
+