Chengrui Wang commited on
Commit
596a24b
·
1 Parent(s): 92ff628

Add a model for facial expression recognition (#100)

Browse files
README.md CHANGED
@@ -19,6 +19,7 @@ Guidelines:
19
  | ------------------------------------------------------- | ----------------------------- | ---------- | -------------- | ------------ | --------------- | ------------ | ----------- |
20
  | [YuNet](./models/face_detection_yunet) | Face Detection | 160x120 | 1.45 | 6.22 | 12.18 | 4.04 | 86.69 |
21
  | [SFace](./models/face_recognition_sface) | Face Recognition | 112x112 | 8.65 | 99.20 | 24.88 | 46.25 | --- |
 
22
  | [LPD-YuNet](./models/license_plate_detection_yunet/) | License Plate Detection | 320x240 | --- | 168.03 | 56.12 | 29.53 | --- |
23
  | [YOLOX](./models/object_detection_yolox/) | Object Detection | 640x640 | 176.68 | 1496.70 | 388.95 | 420.98 | --- |
24
  | [NanoDet](./models/object_detection_nanodet/) | Object Detection | 416x416 | 157.91 | 220.36 | 64.94 | 116.64 | --- |
@@ -62,6 +63,10 @@ Some examples are listed below. You can find more in the directory of each model
62
 
63
  ![largest selfie](./models/face_detection_yunet/examples/largest_selfie.jpg)
64
 
 
 
 
 
65
  ### Human Segmentation with [PP-HumanSeg](./models/human_segmentation_pphumanseg/)
66
 
67
  ![messi](./models/human_segmentation_pphumanseg/examples/messi.jpg)
 
19
  | ------------------------------------------------------- | ----------------------------- | ---------- | -------------- | ------------ | --------------- | ------------ | ----------- |
20
  | [YuNet](./models/face_detection_yunet) | Face Detection | 160x120 | 1.45 | 6.22 | 12.18 | 4.04 | 86.69 |
21
  | [SFace](./models/face_recognition_sface) | Face Recognition | 112x112 | 8.65 | 99.20 | 24.88 | 46.25 | --- |
22
+ | [FER](./models/facial_expression_recognition/) | Facial Expression Recognition | 112x112 | 4.43 | 49.86 | 31.07 | 108.53\* | --- |
23
  | [LPD-YuNet](./models/license_plate_detection_yunet/) | License Plate Detection | 320x240 | --- | 168.03 | 56.12 | 29.53 | --- |
24
  | [YOLOX](./models/object_detection_yolox/) | Object Detection | 640x640 | 176.68 | 1496.70 | 388.95 | 420.98 | --- |
25
  | [NanoDet](./models/object_detection_nanodet/) | Object Detection | 416x416 | 157.91 | 220.36 | 64.94 | 116.64 | --- |
 
63
 
64
  ![largest selfie](./models/face_detection_yunet/examples/largest_selfie.jpg)
65
 
66
+ ### Facial Expression Recognition with Progressive Teacher(./models/facial_expression_recognition/)
67
+
68
+ ![fer demo](./models/facial_expression_recognition/examples/selfie.jpg)
69
+
70
  ### Human Segmentation with [PP-HumanSeg](./models/human_segmentation_pphumanseg/)
71
 
72
  ![messi](./models/human_segmentation_pphumanseg/examples/messi.jpg)
benchmark/config/facial_expression_recognition.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Benchmark:
2
+ name: "Facial Expression Recognition Benchmark"
3
+ type: "Recognition"
4
+ data:
5
+ path: "benchmark/data/facial_expression_recognition/fer_evaluation"
6
+ files: ["RAF_test_0_61.jpg", "RAF_test_0_30.jpg", "RAF_test_6_25.jpg"]
7
+ metric: # 'sizes' is omitted since this model requires input of fixed size
8
+ warmup: 30
9
+ repeat: 10
10
+ reduction: "median"
11
+ backend: "default"
12
+ target: "cpu"
13
+
14
+ Model:
15
+ name: "FacialExpressionRecog"
16
+ modelPath: "models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx"
benchmark/download_data.py CHANGED
@@ -173,6 +173,10 @@ data_downloaders = dict(
173
  url='https://drive.google.com/u/0/uc?id=1BRIozREIzqkm_aMQ581j93oWoS-6TLST&export=download',
174
  sha='03892b9036c58d9400255ff73858caeec1f46609',
175
  filename='face_recognition.zip'),
 
 
 
 
176
  text=Downloader(name='text',
177
  url='https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download',
178
  sha='a40cf095ceb77159ddd2a5902f3b4329696dd866',
 
173
  url='https://drive.google.com/u/0/uc?id=1BRIozREIzqkm_aMQ581j93oWoS-6TLST&export=download',
174
  sha='03892b9036c58d9400255ff73858caeec1f46609',
175
  filename='face_recognition.zip'),
176
+ facial_expression_recognition=Downloader(name='facial_expression_recognition',
177
+ url='https://drive.google.com/u/0/uc?id=13ZE0Pz302z1AQmBmYGuowkTiEXVLyFFZ&export=download',
178
+ sha='8f757559820c8eaa1b1e0065f9c3bbbd4f49efe2',
179
+ filename='facial_expression_recognition.zip'),
180
  text=Downloader(name='text',
181
  url='https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download',
182
  sha='a40cf095ceb77159ddd2a5902f3b4329696dd866',
benchmark/utils/dataloaders/recognition.py CHANGED
@@ -16,7 +16,10 @@ class RecognitionImageLoader(_BaseImageLoader):
16
  def _load_label(self):
17
  labels = dict.fromkeys(self._files, None)
18
  for filename in self._files:
19
- labels[filename] = np.loadtxt(os.path.join(self._path, '{}.txt'.format(filename[:-4])), ndmin=2)
 
 
 
20
  return labels
21
 
22
  def __iter__(self):
 
16
  def _load_label(self):
17
  labels = dict.fromkeys(self._files, None)
18
  for filename in self._files:
19
+ if os.path.exists(os.path.join(self._path, '{}.txt'.format(filename[:-4]))):
20
+ labels[filename] = np.loadtxt(os.path.join(self._path, '{}.txt'.format(filename[:-4])), ndmin=2)
21
+ else:
22
+ labels[filename] = None
23
  return labels
24
 
25
  def __iter__(self):
benchmark/utils/metrics/recognition.py CHANGED
@@ -12,12 +12,20 @@ class Recognition(BaseMetric):
12
  img, bboxes = args
13
 
14
  self._timer.reset()
15
- for idx, bbox in enumerate(bboxes):
 
 
 
 
 
 
 
 
16
  for _ in range(self._warmup):
17
- model.infer(img, bbox)
18
  for _ in range(self._repeat):
19
  self._timer.start()
20
- model.infer(img, bbox)
21
  self._timer.stop()
22
 
23
  return self._getResult()
 
12
  img, bboxes = args
13
 
14
  self._timer.reset()
15
+ if bboxes is not None:
16
+ for idx, bbox in enumerate(bboxes):
17
+ for _ in range(self._warmup):
18
+ model.infer(img, bbox)
19
+ for _ in range(self._repeat):
20
+ self._timer.start()
21
+ model.infer(img, bbox)
22
+ self._timer.stop()
23
+ else:
24
  for _ in range(self._warmup):
25
+ model.infer(img, None)
26
  for _ in range(self._repeat):
27
  self._timer.start()
28
+ model.infer(img, None)
29
  self._timer.stop()
30
 
31
  return self._getResult()
models/__init__.py CHANGED
@@ -14,6 +14,7 @@ from .handpose_estimation_mediapipe.mp_handpose import MPHandPose
14
  from .license_plate_detection_yunet.lpd_yunet import LPD_YuNet
15
  from .object_detection_nanodet.nanodet import NanoDet
16
  from .object_detection_yolox.yolox import YoloX
 
17
 
18
  class Registery:
19
  def __init__(self, name):
@@ -43,4 +44,4 @@ MODELS.register(MPHandPose)
43
  MODELS.register(LPD_YuNet)
44
  MODELS.register(NanoDet)
45
  MODELS.register(YoloX)
46
-
 
14
  from .license_plate_detection_yunet.lpd_yunet import LPD_YuNet
15
  from .object_detection_nanodet.nanodet import NanoDet
16
  from .object_detection_yolox.yolox import YoloX
17
+ from .facial_expression_recognition.facial_fer_model import FacialExpressionRecog
18
 
19
  class Registery:
20
  def __init__(self, name):
 
44
  MODELS.register(LPD_YuNet)
45
  MODELS.register(NanoDet)
46
  MODELS.register(YoloX)
47
+ MODELS.register(FacialExpressionRecog)
models/facial_expression_recognition/README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Progressive Teacher
3
+
4
+ Progressive Teacher: [Boosting Facial Expression Recognition by A Semi-Supervised Progressive Teacher](https://scholar.google.com/citations?view_op=view_citation&hl=zh-CN&user=OCwcfAwAAAAJ&citation_for_view=OCwcfAwAAAAJ:u5HHmVD_uO8C)
5
+
6
+ Note:
7
+ - Progressive Teacher is contributed by [Jing Jiang](https://scholar.google.com/citations?user=OCwcfAwAAAAJ&hl=zh-CN).
8
+ - [MobileFaceNet](https://link.springer.com/chapter/10.1007/978-3-319-97909-0_46) is used as the backbone and the model is able to classify seven basic facial expressions (angry, disgust, fearful, happy, neutral, sad, surprised).
9
+ - [facial_expression_recognition_mobilefacenet_2022july.onnx](https://github.com/opencv/opencv_zoo/raw/master/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx) is implemented thanks to [Chengrui Wang](https://github.com/opencv).
10
+
11
+ Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html).
12
+
13
+ | Models | Accuracy |
14
+ |-------------|----------|
15
+ | Progressive Teacher | 88.27% |
16
+
17
+
18
+ ## Demo
19
+
20
+ ***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep).
21
+
22
+ Run the following command to try the demo:
23
+ ```shell
24
+ # recognize the facial expression on images
25
+ python demo.py --input /path/to/image
26
+ ```
27
+
28
+ ### Example outputs
29
+
30
+ Note: Zoom in to to see the recognized facial expression in the top-left corner of each face boxes.
31
+
32
+ ![fer demo](./examples/selfie.jpg)
33
+
34
+ ## License
35
+
36
+ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
37
+
38
+ ## Reference
39
+
40
+ - https://ieeexplore.ieee.org/abstract/document/9629313
models/facial_expression_recognition/demo.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import argparse
3
+ import copy
4
+ import datetime
5
+
6
+ import numpy as np
7
+ import cv2 as cv
8
+
9
+ from facial_fer_model import FacialExpressionRecog
10
+
11
+ sys.path.append('../face_detection_yunet')
12
+ from yunet import YuNet
13
+
14
+
15
+ def str2bool(v):
16
+ if v.lower() in ['on', 'yes', 'true', 'y', 't']:
17
+ return True
18
+ elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
19
+ return False
20
+ else:
21
+ raise NotImplementedError
22
+
23
+
24
+ backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
25
+ targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
26
+ help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
27
+ help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
28
+ try:
29
+ backends += [cv.dnn.DNN_BACKEND_TIMVX]
30
+ targets += [cv.dnn.DNN_TARGET_NPU]
31
+ help_msg_backends += "; {:d}: TIMVX"
32
+ help_msg_targets += "; {:d}: NPU"
33
+ except:
34
+ print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.')
35
+
36
+ parser = argparse.ArgumentParser(description='Facial Expression Recognition')
37
+ parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
38
+ parser.add_argument('--model', '-fm', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx', help='Path to the facial expression recognition model.')
39
+ parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
40
+ parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
41
+ parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
42
+ parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
43
+ args = parser.parse_args()
44
+
45
+
46
+ def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)):
47
+
48
+ print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res)))
49
+
50
+ output = image.copy()
51
+ landmark_color = [
52
+ (255, 0, 0), # right eye
53
+ (0, 0, 255), # left eye
54
+ (0, 255, 0), # nose tip
55
+ (255, 0, 255), # right mouth corner
56
+ (0, 255, 255) # left mouth corner
57
+ ]
58
+
59
+ for ind, (det, fer_type) in enumerate(zip(det_res, fer_res)):
60
+ bbox = det[0:4].astype(np.int32)
61
+ fer_type = FacialExpressionRecog.getDesc(fer_type)
62
+ print("Face %2d: %d %d %d %d %s." % (ind, bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3], fer_type))
63
+ cv.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), box_color, 2)
64
+ cv.putText(output, fer_type, (bbox[0], bbox[1]+12), cv.FONT_HERSHEY_DUPLEX, 0.5, text_color)
65
+ landmarks = det[4:14].astype(np.int32).reshape((5, 2))
66
+ for idx, landmark in enumerate(landmarks):
67
+ cv.circle(output, landmark, 2, landmark_color[idx], 2)
68
+ return output
69
+
70
+
71
+ def process(detect_model, fer_model, frame):
72
+ h, w, _ = frame.shape
73
+ detect_model.setInputSize([w, h])
74
+ dets = detect_model.infer(frame)
75
+
76
+ if dets is None:
77
+ return False, None, None
78
+
79
+ fer_res = np.zeros(0, dtype=np.int8)
80
+ for face_points in dets:
81
+ fer_res = np.concatenate((fer_res, fer_model.infer(frame, face_points[:-1])), axis=0)
82
+ return True, dets, fer_res
83
+
84
+
85
+ if __name__ == '__main__':
86
+ detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx')
87
+
88
+ fer_model = FacialExpressionRecog(modelPath=args.model,
89
+ backendId=args.backend,
90
+ targetId=args.target)
91
+
92
+ # If input is an image
93
+ if args.input is not None:
94
+ image = cv.imread(args.input)
95
+
96
+ # Get detection and fer results
97
+ status, dets, fer_res = process(detect_model, fer_model, image)
98
+
99
+ if status:
100
+ # Draw results on the input image
101
+ image = visualize(image, dets, fer_res)
102
+
103
+ # Save results
104
+ if args.save:
105
+ cv.imwrite('result.jpg', image)
106
+ print('Results saved to result.jpg\n')
107
+
108
+ # Visualize results in a new window
109
+ if args.vis:
110
+ cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
111
+ cv.imshow(args.input, image)
112
+ cv.waitKey(0)
113
+ else: # Omit input to call default camera
114
+ deviceId = 0
115
+ cap = cv.VideoCapture(deviceId)
116
+
117
+ while cv.waitKey(1) < 0:
118
+ hasFrame, frame = cap.read()
119
+ if not hasFrame:
120
+ print('No frames grabbed!')
121
+ break
122
+
123
+ # Get detection and fer results
124
+ status, dets, fer_res = process(detect_model, fer_model, frame)
125
+
126
+ if status:
127
+ # Draw results on the input image
128
+ frame = visualize(frame, dets, fer_res)
129
+
130
+ # Visualize results in a new window
131
+ cv.imshow('FER Demo', frame)
models/facial_expression_recognition/facial_fer_model.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is part of OpenCV Zoo project.
2
+ # It is subject to the license terms in the LICENSE file found in the same directory.
3
+ #
4
+ # Copyright (C) 2022, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
5
+ # Third party copyrights are property of their respective owners.
6
+
7
+ import numpy as np
8
+ import cv2 as cv
9
+
10
+ class FacialExpressionRecog:
11
+ def __init__(self, modelPath, backendId=0, targetId=0):
12
+ self._modelPath = modelPath
13
+ self._backendId = backendId
14
+ self._targetId = targetId
15
+
16
+ self._model = cv.dnn.readNet(self._modelPath)
17
+ self._model.setPreferableBackend(self._backendId)
18
+ self._model.setPreferableTarget(self._targetId)
19
+
20
+ self._align_model = FaceAlignment()
21
+
22
+ self._inputNames = 'data'
23
+ self._outputNames = ['label']
24
+ self._inputSize = [112, 112]
25
+ self._mean = np.array([0.5, 0.5, 0.5])[np.newaxis, np.newaxis, :]
26
+ self._std = np.array([0.5, 0.5, 0.5])[np.newaxis, np.newaxis, :]
27
+
28
+ @property
29
+ def name(self):
30
+ return self.__class__.__name__
31
+
32
+ def setBackend(self, backend_id):
33
+ self._backendId = backend_id
34
+ self._model.setPreferableBackend(self._backendId)
35
+
36
+ def setTarget(self, target_id):
37
+ self._targetId = target_id
38
+ self._model.setPreferableTarget(self._targetId)
39
+
40
+ def _preprocess(self, image, bbox):
41
+ if bbox is not None:
42
+ image = self._align_model.get_align_image(image, bbox[4:].reshape(-1, 2))
43
+ image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
44
+ image = image.astype(np.float32, copy=False) / 255.0
45
+ image -= self._mean
46
+ image /= self._std
47
+ return cv.dnn.blobFromImage(image)
48
+
49
+ def infer(self, image, bbox=None):
50
+ # Preprocess
51
+ inputBlob = self._preprocess(image, bbox)
52
+
53
+ # Forward
54
+ self._model.setInput(inputBlob, self._inputNames)
55
+ outputBlob = self._model.forward(self._outputNames)
56
+
57
+ # Postprocess
58
+ results = self._postprocess(outputBlob)
59
+
60
+ return results
61
+
62
+ def _postprocess(self, outputBlob):
63
+ result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
64
+ return result
65
+
66
+ @staticmethod
67
+ def getDesc(ind):
68
+ _expression_enum = ["angry", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
69
+ return _expression_enum[ind]
70
+
71
+
72
+ class FaceAlignment():
73
+ def __init__(self, reflective=False):
74
+ self._std_points = np.array([[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041]])
75
+ self.reflective = reflective
76
+
77
+ def __tformfwd(self, trans, uv):
78
+ uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
79
+ xy = np.dot(uv, trans)
80
+ xy = xy[:, 0:-1]
81
+ return xy
82
+
83
+ def __tforminv(self, trans, uv):
84
+ Tinv = np.linalg.inv(trans)
85
+ xy = self.__tformfwd(Tinv, uv)
86
+ return xy
87
+
88
+ def __findNonreflectiveSimilarity(self, uv, xy, options=None):
89
+ options = {"K": 2}
90
+
91
+ K = options["K"]
92
+ M = xy.shape[0]
93
+ x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
94
+ y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
95
+ # print '--->x, y:\n', x, y
96
+
97
+ tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
98
+ tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
99
+ X = np.vstack((tmp1, tmp2))
100
+ # print '--->X.shape: ', X.shape
101
+ # print 'X:\n', X
102
+
103
+ u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
104
+ v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
105
+ U = np.vstack((u, v))
106
+ # print '--->U.shape: ', U.shape
107
+ # print 'U:\n', U
108
+
109
+ # We know that X * r = U
110
+ if np.linalg.matrix_rank(X) >= 2 * K:
111
+ r, _, _, _ = np.linalg.lstsq(X, U, rcond=-1)
112
+ # print(r, X, U, sep="\n")
113
+ r = np.squeeze(r)
114
+ else:
115
+ raise Exception("cp2tform:twoUniquePointsReq")
116
+
117
+ sc = r[0]
118
+ ss = r[1]
119
+ tx = r[2]
120
+ ty = r[3]
121
+
122
+ Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
123
+ T = np.linalg.inv(Tinv)
124
+ T[:, 2] = np.array([0, 0, 1])
125
+
126
+ return T, Tinv
127
+
128
+ def __findSimilarity(self, uv, xy, options=None):
129
+ options = {"K": 2}
130
+
131
+ # uv = np.array(uv)
132
+ # xy = np.array(xy)
133
+
134
+ # Solve for trans1
135
+ trans1, trans1_inv = self.__findNonreflectiveSimilarity(uv, xy, options)
136
+
137
+ # manually reflect the xy data across the Y-axis
138
+ xyR = xy
139
+ xyR[:, 0] = -1 * xyR[:, 0]
140
+ # Solve for trans2
141
+ trans2r, trans2r_inv = self.__findNonreflectiveSimilarity(uv, xyR, options)
142
+
143
+ # manually reflect the tform to undo the reflection done on xyR
144
+ TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
145
+ trans2 = np.dot(trans2r, TreflectY)
146
+
147
+ # Figure out if trans1 or trans2 is better
148
+ xy1 = self.__tformfwd(trans1, uv)
149
+ norm1 = np.linalg.norm(xy1 - xy)
150
+ xy2 = self.__tformfwd(trans2, uv)
151
+ norm2 = np.linalg.norm(xy2 - xy)
152
+
153
+ if norm1 <= norm2:
154
+ return trans1, trans1_inv
155
+ else:
156
+ trans2_inv = np.linalg.inv(trans2)
157
+ return trans2, trans2_inv
158
+
159
+ def __get_similarity_transform(self, src_pts, dst_pts):
160
+ if self.reflective:
161
+ trans, trans_inv = self.__findSimilarity(src_pts, dst_pts)
162
+ else:
163
+ trans, trans_inv = self.__findNonreflectiveSimilarity(src_pts, dst_pts)
164
+ return trans, trans_inv
165
+
166
+ def __cvt_tform_mat_for_cv2(self, trans):
167
+ cv2_trans = trans[:, 0:2].T
168
+ return cv2_trans
169
+
170
+ def get_similarity_transform_for_cv2(self, src_pts, dst_pts):
171
+ trans, trans_inv = self.__get_similarity_transform(src_pts, dst_pts)
172
+ cv2_trans = self.__cvt_tform_mat_for_cv2(trans)
173
+ return cv2_trans, trans
174
+
175
+ def get_align_image(self, image, lm5_points):
176
+ assert lm5_points is not None
177
+ tfm, trans = self.get_similarity_transform_for_cv2(lm5_points, self._std_points)
178
+ return cv.warpAffine(image, tfm, (112, 112))
tools/quantize/inc_configs/fer.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 1.0
2
+
3
+ model: # mandatory. used to specify model specific information.
4
+ name: fer
5
+ framework: onnxrt_qlinearops # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension.
6
+
7
+ quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space.
8
+ approach: post_training_static_quant # optional. default value is post_training_static_quant.
9
+ calibration:
10
+ dataloader:
11
+ batch_size: 1
12
+ dataset:
13
+ dummy:
14
+ shape: [1, 3, 112, 112]
15
+ low: -1.0
16
+ high: 1.0
17
+ dtype: float32
18
+ label: True
19
+
20
+ tuning:
21
+ accuracy_criterion:
22
+ relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%.
23
+ exit_policy:
24
+ timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit.
25
+ max_trials: 50 # optional. max tune times. default value is 100. combine with timeout field to decide when to exit.
26
+ random_seed: 9527 # optional. random seed for deterministic tuning.
tools/quantize/quantize-inc.py CHANGED
@@ -5,12 +5,39 @@ import cv2 as cv
5
 
6
  import onnx
7
  from neural_compressor.experimental import Quantization, common
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  class Quantize:
10
- def __init__(self, model_path, config_path, custom_dataset=None):
11
  self.model_path = model_path
12
  self.config_path = config_path
13
  self.custom_dataset = custom_dataset
 
 
14
 
15
  def run(self):
16
  print('Quantizing (int8) with Intel\'s Neural Compressor:')
@@ -21,31 +48,39 @@ class Quantize:
21
 
22
  model = onnx.load(self.model_path)
23
  quantizer = Quantization(self.config_path)
 
24
  if self.custom_dataset is not None:
25
  quantizer.calib_dataloader = common.DataLoader(self.custom_dataset)
26
- quantizer.model = common.Model(model)
 
 
 
27
  q_model = quantizer()
28
  q_model.save(output_name)
29
 
 
30
  class Dataset:
31
- def __init__(self, root, size=None, dim='chw', mean=0.0, std=1.0, swapRB=False, toFP32=False):
32
  self.root = root
33
  self.size = size
34
  self.dim = dim
 
35
  self.mean = mean
36
  self.std = std
37
  self.swapRB = swapRB
38
  self.toFP32 = toFP32
39
 
40
- self.image_list = self.load_image_list(self.root)
41
 
42
  def load_image_list(self, path):
43
  image_list = []
 
44
  for f in os.listdir(path):
45
  if not f.endswith('.jpg'):
46
  continue
47
  image_list.append(os.path.join(path, f))
48
- return image_list
 
49
 
50
  def __getitem__(self, idx):
51
  img = cv.imread(self.image_list[idx])
@@ -59,18 +94,35 @@ class Dataset:
59
  if self.toFP32:
60
  img = img.astype(np.float32)
61
 
 
62
  img = img - self.mean
63
  img = img / self.std
64
 
65
  if self.dim == 'chw':
66
- img = img.transpose(2, 0, 1) # hwc -> chw
67
 
68
- return img, 1
69
 
70
  def __len__(self):
71
  return len(self.image_list)
72
 
73
- models=dict(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  mobilenetv1=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr.onnx',
75
  config_path='./inc_configs/mobilenet.yaml'),
76
  mobilenetv2=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr.onnx',
@@ -84,6 +136,11 @@ models=dict(
84
  lpd_yunet=Quantize(model_path='../../models/license_plate_detection_yunet/license_plate_detection_lpd_yunet_2022may.onnx',
85
  config_path='./inc_configs/lpd_yunet.yaml',
86
  custom_dataset=Dataset(root='../../benchmark/data/license_plate_detection', size=(320, 240), dim='chw', toFP32=True)),
 
 
 
 
 
87
  )
88
 
89
  if __name__ == '__main__':
@@ -97,4 +154,3 @@ if __name__ == '__main__':
97
  for selected_model_name in selected_models:
98
  q = models[selected_model_name]
99
  q.run()
100
-
 
5
 
6
  import onnx
7
  from neural_compressor.experimental import Quantization, common
8
+ from neural_compressor.experimental.metric import BaseMetric
9
+
10
+
11
+ class Accuracy(BaseMetric):
12
+ def __init__(self, *args):
13
+ self.pred_list = []
14
+ self.label_list = []
15
+ self.samples = 0
16
+
17
+ def update(self, predict, label):
18
+ predict = np.array(predict)
19
+ label = np.array(label)
20
+ self.pred_list.append(np.argmax(predict[0]))
21
+ self.label_list.append(label[0][0])
22
+ self.samples += 1
23
+
24
+ def reset(self):
25
+ self.pred_list = []
26
+ self.label_list = []
27
+ self.samples = 0
28
+
29
+ def result(self):
30
+ correct_num = np.sum(np.array(self.pred_list) == np.array(self.label_list))
31
+ return correct_num / self.samples
32
+
33
 
34
  class Quantize:
35
+ def __init__(self, model_path, config_path, custom_dataset=None, eval_dataset=None, metric=None):
36
  self.model_path = model_path
37
  self.config_path = config_path
38
  self.custom_dataset = custom_dataset
39
+ self.eval_dataset = eval_dataset
40
+ self.metric = metric
41
 
42
  def run(self):
43
  print('Quantizing (int8) with Intel\'s Neural Compressor:')
 
48
 
49
  model = onnx.load(self.model_path)
50
  quantizer = Quantization(self.config_path)
51
+ quantizer.model = common.Model(model)
52
  if self.custom_dataset is not None:
53
  quantizer.calib_dataloader = common.DataLoader(self.custom_dataset)
54
+ if self.eval_dataset is not None:
55
+ quantizer.eval_dataloader = common.DataLoader(self.eval_dataset)
56
+ if self.metric is not None:
57
+ quantizer.metric = common.Metric(metric_cls=self.metric, name='metric')
58
  q_model = quantizer()
59
  q_model.save(output_name)
60
 
61
+
62
  class Dataset:
63
+ def __init__(self, root, size=None, dim='chw', scale=1.0, mean=0.0, std=1.0, swapRB=False, toFP32=False):
64
  self.root = root
65
  self.size = size
66
  self.dim = dim
67
+ self.scale = scale
68
  self.mean = mean
69
  self.std = std
70
  self.swapRB = swapRB
71
  self.toFP32 = toFP32
72
 
73
+ self.image_list, self.label_list = self.load_image_list(self.root)
74
 
75
  def load_image_list(self, path):
76
  image_list = []
77
+ label_list = []
78
  for f in os.listdir(path):
79
  if not f.endswith('.jpg'):
80
  continue
81
  image_list.append(os.path.join(path, f))
82
+ label_list.append(1)
83
+ return image_list, label_list
84
 
85
  def __getitem__(self, idx):
86
  img = cv.imread(self.image_list[idx])
 
94
  if self.toFP32:
95
  img = img.astype(np.float32)
96
 
97
+ img = img * self.scale
98
  img = img - self.mean
99
  img = img / self.std
100
 
101
  if self.dim == 'chw':
102
+ img = img.transpose(2, 0, 1) # hwc -> chw
103
 
104
+ return img, self.label_list[idx]
105
 
106
  def __len__(self):
107
  return len(self.image_list)
108
 
109
+
110
+ class FerDataset(Dataset):
111
+ def __init__(self, root, size=None, dim='chw', scale=1.0, mean=0.0, std=1.0, swapRB=False, toFP32=False):
112
+ super(FerDataset, self).__init__(root, size, dim, scale, mean, std, swapRB, toFP32)
113
+
114
+ def load_image_list(self, path):
115
+ image_list = []
116
+ label_list = []
117
+ for f in os.listdir(path):
118
+ if not f.endswith('.jpg'):
119
+ continue
120
+ image_list.append(os.path.join(path, f))
121
+ label_list.append(int(f.split("_")[2]))
122
+ return image_list, label_list
123
+
124
+
125
+ models = dict(
126
  mobilenetv1=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr.onnx',
127
  config_path='./inc_configs/mobilenet.yaml'),
128
  mobilenetv2=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr.onnx',
 
136
  lpd_yunet=Quantize(model_path='../../models/license_plate_detection_yunet/license_plate_detection_lpd_yunet_2022may.onnx',
137
  config_path='./inc_configs/lpd_yunet.yaml',
138
  custom_dataset=Dataset(root='../../benchmark/data/license_plate_detection', size=(320, 240), dim='chw', toFP32=True)),
139
+ fer=Quantize(model_path='../../models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx',
140
+ config_path='./inc_configs/fer.yaml',
141
+ custom_dataset=FerDataset(root='../../benchmark/data/facial_expression_recognition/fer_calibration', size=(112, 112), toFP32=True, swapRB=True, scale=1./255, mean=0.5, std=0.5),
142
+ eval_dataset=FerDataset(root='../../benchmark/data/facial_expression_recognition/fer_evaluation', size=(112, 112), toFP32=True, swapRB=True, scale=1./255, mean=0.5, std=0.5),
143
+ metric=Accuracy),
144
  )
145
 
146
  if __name__ == '__main__':
 
154
  for selected_model_name in selected_models:
155
  q = models[selected_model_name]
156
  q.run()