Wanli
commited on
Commit
·
260eb6d
1
Parent(s):
ebeb80f
Update palm detection model from MediaPipe (2023feb) (#128)
Browse files* update to the latest palm detection model
* quantize palm detection model
* make palm aligned to center when resize
* update benchmark for palm detection
* update benchmark data
* update readme
* update a new quantized model
* update readme
- README.md +1 -1
- benchmark/config/palm_detection_mediapipe.yaml +1 -1
- models/handpose_estimation_mediapipe/demo.py +1 -1
- models/palm_detection_mediapipe/README.md +6 -3
- models/palm_detection_mediapipe/demo.py +2 -2
- models/palm_detection_mediapipe/mp_palmdet.py +0 -0
- tools/quantize/README.md +4 -0
- tools/quantize/inc_configs/mp_palmdet.yaml +0 -52
- tools/quantize/quantize-inc.py +0 -3
- tools/quantize/quantize-ort.py +12 -4
- tools/quantize/requirements.txt +1 -1
README.md
CHANGED
@@ -34,7 +34,7 @@ Guidelines:
|
|
34 |
| [WeChatQRCode](./models/qrcode_wechatqrcode) | QR Code Detection and Parsing | 100x100 | 7.04 | 37.68 | --- | --- | --- |
|
35 |
| [DaSiamRPN](./models/object_tracking_dasiamrpn) | Object Tracking | 1280x720 | 36.15 | 705.48 | 76.82 | --- | --- |
|
36 |
| [YoutuReID](./models/person_reid_youtureid) | Person Re-Identification | 128x256 | 35.81 | 521.98 | 90.07 | 44.61 | --- |
|
37 |
-
| [MP-PalmDet](./models/palm_detection_mediapipe) | Palm Detection |
|
38 |
| [MP-HandPose](./models/handpose_estimation_mediapipe) | Hand Pose Estimation | 256x256 | 20.16 | 148.24 | 156.30 | 42.70 | --- |
|
39 |
|
40 |
\*: Models are quantized in per-channel mode, which run slower than per-tensor quantized models on NPU.
|
|
|
34 |
| [WeChatQRCode](./models/qrcode_wechatqrcode) | QR Code Detection and Parsing | 100x100 | 7.04 | 37.68 | --- | --- | --- |
|
35 |
| [DaSiamRPN](./models/object_tracking_dasiamrpn) | Object Tracking | 1280x720 | 36.15 | 705.48 | 76.82 | --- | --- |
|
36 |
| [YoutuReID](./models/person_reid_youtureid) | Person Re-Identification | 128x256 | 35.81 | 521.98 | 90.07 | 44.61 | --- |
|
37 |
+
| [MP-PalmDet](./models/palm_detection_mediapipe) | Palm Detection | 192x192 | 11.09 | 63.79 | 83.20 | 33.81 | --- |
|
38 |
| [MP-HandPose](./models/handpose_estimation_mediapipe) | Hand Pose Estimation | 256x256 | 20.16 | 148.24 | 156.30 | 42.70 | --- |
|
39 |
|
40 |
\*: Models are quantized in per-channel mode, which run slower than per-tensor quantized models on NPU.
|
benchmark/config/palm_detection_mediapipe.yaml
CHANGED
@@ -5,7 +5,7 @@ Benchmark:
|
|
5 |
path: "data/palm_detection_20230125"
|
6 |
files: ["palm1.jpg", "palm2.jpg", "palm3.jpg"]
|
7 |
sizes: # [[w1, h1], ...], Omit to run at original scale
|
8 |
-
- [
|
9 |
metric:
|
10 |
warmup: 30
|
11 |
repeat: 10
|
|
|
5 |
path: "data/palm_detection_20230125"
|
6 |
files: ["palm1.jpg", "palm2.jpg", "palm3.jpg"]
|
7 |
sizes: # [[w1, h1], ...], Omit to run at original scale
|
8 |
+
- [192, 192]
|
9 |
metric:
|
10 |
warmup: 30
|
11 |
repeat: 10
|
models/handpose_estimation_mediapipe/demo.py
CHANGED
@@ -91,7 +91,7 @@ def visualize(image, hands, print_result=False):
|
|
91 |
|
92 |
if __name__ == '__main__':
|
93 |
# palm detector
|
94 |
-
palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/
|
95 |
nmsThreshold=0.3,
|
96 |
scoreThreshold=0.8,
|
97 |
backendId=args.backend,
|
|
|
91 |
|
92 |
if __name__ == '__main__':
|
93 |
# palm detector
|
94 |
+
palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
|
95 |
nmsThreshold=0.3,
|
96 |
scoreThreshold=0.8,
|
97 |
backendId=args.backend,
|
models/palm_detection_mediapipe/README.md
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
# Palm detector from MediaPipe Handpose
|
2 |
|
3 |
-
This model detects palm bounding boxes and palm landmarks, and is converted from
|
4 |
|
5 |
-
-
|
6 |
-
- tf_saved_model to ONNX: https://github.com/onnx/tensorflow-onnx
|
7 |
- simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
|
|
|
|
|
8 |
|
9 |
## Demo
|
10 |
|
@@ -31,3 +32,5 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
|
|
31 |
## Reference
|
32 |
|
33 |
- MediaPipe Handpose: https://github.com/tensorflow/tfjs-models/tree/master/handpose
|
|
|
|
|
|
1 |
# Palm detector from MediaPipe Handpose
|
2 |
|
3 |
+
This model detects palm bounding boxes and palm landmarks, and is converted from TFLite to ONNX using following tools:
|
4 |
|
5 |
+
- TFLite model to ONNX: https://github.com/onnx/tensorflow-onnx
|
|
|
6 |
- simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
|
7 |
+
- SSD Anchors are generated from [GenMediaPipePalmDectionSSDAnchors](https://github.com/VimalMollyn/GenMediaPipePalmDectionSSDAnchors)
|
8 |
+
|
9 |
|
10 |
## Demo
|
11 |
|
|
|
32 |
## Reference
|
33 |
|
34 |
- MediaPipe Handpose: https://github.com/tensorflow/tfjs-models/tree/master/handpose
|
35 |
+
- MediaPipe hands model and model card: https://google.github.io/mediapipe/solutions/models.html#hands
|
36 |
+
- Int8 model quantized with rgb evaluation set of FreiHAND: https://lmb.informatik.uni-freiburg.de/resources/datasets/FreihandDataset.en.html
|
models/palm_detection_mediapipe/demo.py
CHANGED
@@ -27,10 +27,10 @@ except:
|
|
27 |
|
28 |
parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
|
29 |
parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
|
30 |
-
parser.add_argument('--model', '-m', type=str, default='./
|
31 |
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
32 |
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
33 |
-
parser.add_argument('--score_threshold', type=float, default=0.
|
34 |
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
|
35 |
parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
|
36 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
|
|
|
27 |
|
28 |
parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
|
29 |
parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
|
30 |
+
parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx', help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.')
|
31 |
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
32 |
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
33 |
+
parser.add_argument('--score_threshold', type=float, default=0.8, help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.')
|
34 |
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
|
35 |
parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
|
36 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')
|
models/palm_detection_mediapipe/mp_palmdet.py
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tools/quantize/README.md
CHANGED
@@ -51,3 +51,7 @@ models = dict(
|
|
51 |
# 3. quantize your model
|
52 |
python quantize-inc.py model1
|
53 |
```
|
|
|
|
|
|
|
|
|
|
51 |
# 3. quantize your model
|
52 |
python quantize-inc.py model1
|
53 |
```
|
54 |
+
|
55 |
+
## Dataset
|
56 |
+
Some models are quantized with extra datasets.
|
57 |
+
- [MP-PalmDet](../../models/palm_detection_mediapipe) int8 model quantized with evaluation set of [FreiHAND](https://lmb.informatik.uni-freiburg.de/resources/datasets/FreihandDataset.en.html). The dataset downloaded from [link](https://lmb.informatik.uni-freiburg.de/data/freihand/FreiHAND_pub_v2_eval.zip). Unpack it and path to `FreiHAND_pub_v2_eval/evaluation/rgb`.
|
tools/quantize/inc_configs/mp_palmdet.yaml
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
#
|
2 |
-
# Copyright (c) 2021 Intel Corporation
|
3 |
-
#
|
4 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
-
# you may not use this file except in compliance with the License.
|
6 |
-
# You may obtain a copy of the License at
|
7 |
-
#
|
8 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
-
#
|
10 |
-
# Unless required by applicable law or agreed to in writing, software
|
11 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
-
# See the License for the specific language governing permissions and
|
14 |
-
# limitations under the License.
|
15 |
-
|
16 |
-
version: 1.0
|
17 |
-
|
18 |
-
model: # mandatory. used to specify model specific information.
|
19 |
-
name: mp_palmdet
|
20 |
-
framework: onnxrt_qlinearops # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension.
|
21 |
-
|
22 |
-
quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space.
|
23 |
-
approach: post_training_static_quant # optional. default value is post_training_static_quant.
|
24 |
-
calibration:
|
25 |
-
dataloader:
|
26 |
-
batch_size: 1
|
27 |
-
dataset:
|
28 |
-
dummy:
|
29 |
-
shape: [1, 256, 256, 3]
|
30 |
-
low: -1.0
|
31 |
-
high: 1.0
|
32 |
-
dtype: float32
|
33 |
-
label: True
|
34 |
-
|
35 |
-
model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space.
|
36 |
-
weight:
|
37 |
-
granularity: per_tensor
|
38 |
-
scheme: asym
|
39 |
-
dtype: int8
|
40 |
-
algorithm: minmax
|
41 |
-
activation:
|
42 |
-
granularity: per_tensor
|
43 |
-
scheme: asym
|
44 |
-
dtype: int8
|
45 |
-
algorithm: minmax
|
46 |
-
|
47 |
-
tuning:
|
48 |
-
accuracy_criterion:
|
49 |
-
relative: 0.02 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%.
|
50 |
-
exit_policy:
|
51 |
-
timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit.
|
52 |
-
random_seed: 9527 # optional. random seed for deterministic tuning.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/quantize/quantize-inc.py
CHANGED
@@ -127,9 +127,6 @@ models = dict(
|
|
127 |
config_path='./inc_configs/mobilenet.yaml'),
|
128 |
mobilenetv2=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr.onnx',
|
129 |
config_path='./inc_configs/mobilenet.yaml'),
|
130 |
-
mp_palmdet=Quantize(model_path='../../models/palm_detection_mediapipe/palm_detection_mediapipe_2022may.onnx',
|
131 |
-
config_path='./inc_configs/mp_palmdet.yaml',
|
132 |
-
custom_dataset=Dataset(root='../../benchmark/data/palm_detection', dim='hwc', swapRB=True, mean=127.5, std=127.5, toFP32=True)),
|
133 |
mp_handpose=Quantize(model_path='../../models/handpose_estimation_mediapipe/handpose_estimation_mediapipe_2022may.onnx',
|
134 |
config_path='./inc_configs/mp_handpose.yaml',
|
135 |
custom_dataset=Dataset(root='../../benchmark/data/palm_detection', dim='hwc', swapRB=True, mean=127.5, std=127.5, toFP32=True)),
|
|
|
127 |
config_path='./inc_configs/mobilenet.yaml'),
|
128 |
mobilenetv2=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr.onnx',
|
129 |
config_path='./inc_configs/mobilenet.yaml'),
|
|
|
|
|
|
|
130 |
mp_handpose=Quantize(model_path='../../models/handpose_estimation_mediapipe/handpose_estimation_mediapipe_2022may.onnx',
|
131 |
config_path='./inc_configs/mp_handpose.yaml',
|
132 |
custom_dataset=Dataset(root='../../benchmark/data/palm_detection', dim='hwc', swapRB=True, mean=127.5, std=127.5, toFP32=True)),
|
tools/quantize/quantize-ort.py
CHANGED
@@ -17,10 +17,11 @@ from onnxruntime.quantization import quantize_static, CalibrationDataReader, Qua
|
|
17 |
from transform import Compose, Resize, CenterCrop, Normalize, ColorConvert
|
18 |
|
19 |
class DataReader(CalibrationDataReader):
|
20 |
-
def __init__(self, model_path, image_dir, transforms):
|
21 |
model = onnx.load(model_path)
|
22 |
self.input_name = model.graph.input[0].name
|
23 |
self.transforms = transforms
|
|
|
24 |
self.data = self.get_calibration_data(image_dir)
|
25 |
self.enum_data_dicts = iter([{self.input_name: x} for x in self.data])
|
26 |
|
@@ -37,11 +38,13 @@ class DataReader(CalibrationDataReader):
|
|
37 |
img = cv.imread(os.path.join(image_dir, image_name))
|
38 |
img = self.transforms(img)
|
39 |
blob = cv.dnn.blobFromImage(img)
|
|
|
|
|
40 |
blobs.append(blob)
|
41 |
return blobs
|
42 |
|
43 |
class Quantize:
|
44 |
-
def __init__(self, model_path, calibration_image_dir, transforms=Compose(), per_channel=False, act_type='int8', wt_type='int8'):
|
45 |
self.type_dict = {"uint8" : QuantType.QUInt8, "int8" : QuantType.QInt8}
|
46 |
|
47 |
self.model_path = model_path
|
@@ -52,7 +55,7 @@ class Quantize:
|
|
52 |
self.wt_type = wt_type
|
53 |
|
54 |
# data reader
|
55 |
-
self.dr = DataReader(self.model_path, self.calibration_image_dir, self.transforms)
|
56 |
|
57 |
def check_opset(self, convert=True):
|
58 |
model = onnx.load(self.model_path)
|
@@ -102,7 +105,12 @@ models=dict(
|
|
102 |
transforms=Compose([Resize(size=(100, 32)), Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5]), ColorConvert(ctype=cv.COLOR_BGR2GRAY)])),
|
103 |
crnn_cn=Quantize(model_path='../../models/text_recognition_crnn/text_recognition_CRNN_CN_2021nov.onnx',
|
104 |
calibration_image_dir='../../benchmark/data/text',
|
105 |
-
transforms=Compose([Resize(size=(100, 32))]))
|
|
|
|
|
|
|
|
|
|
|
106 |
)
|
107 |
|
108 |
if __name__ == '__main__':
|
|
|
17 |
from transform import Compose, Resize, CenterCrop, Normalize, ColorConvert
|
18 |
|
19 |
class DataReader(CalibrationDataReader):
|
20 |
+
def __init__(self, model_path, image_dir, transforms, data_dim):
|
21 |
model = onnx.load(model_path)
|
22 |
self.input_name = model.graph.input[0].name
|
23 |
self.transforms = transforms
|
24 |
+
self.data_dim = data_dim
|
25 |
self.data = self.get_calibration_data(image_dir)
|
26 |
self.enum_data_dicts = iter([{self.input_name: x} for x in self.data])
|
27 |
|
|
|
38 |
img = cv.imread(os.path.join(image_dir, image_name))
|
39 |
img = self.transforms(img)
|
40 |
blob = cv.dnn.blobFromImage(img)
|
41 |
+
if self.data_dim == 'hwc':
|
42 |
+
blob = cv.transposeND(blob, [0, 2, 3, 1])
|
43 |
blobs.append(blob)
|
44 |
return blobs
|
45 |
|
46 |
class Quantize:
|
47 |
+
def __init__(self, model_path, calibration_image_dir, transforms=Compose(), per_channel=False, act_type='int8', wt_type='int8', data_dim='chw'):
|
48 |
self.type_dict = {"uint8" : QuantType.QUInt8, "int8" : QuantType.QInt8}
|
49 |
|
50 |
self.model_path = model_path
|
|
|
55 |
self.wt_type = wt_type
|
56 |
|
57 |
# data reader
|
58 |
+
self.dr = DataReader(self.model_path, self.calibration_image_dir, self.transforms, data_dim)
|
59 |
|
60 |
def check_opset(self, convert=True):
|
61 |
model = onnx.load(self.model_path)
|
|
|
105 |
transforms=Compose([Resize(size=(100, 32)), Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5]), ColorConvert(ctype=cv.COLOR_BGR2GRAY)])),
|
106 |
crnn_cn=Quantize(model_path='../../models/text_recognition_crnn/text_recognition_CRNN_CN_2021nov.onnx',
|
107 |
calibration_image_dir='../../benchmark/data/text',
|
108 |
+
transforms=Compose([Resize(size=(100, 32))])),
|
109 |
+
mp_palmdet=Quantize(model_path='../../models/palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
|
110 |
+
calibration_image_dir='path/to/dataset',
|
111 |
+
transforms=Compose([Resize(size=(192, 192)), Normalize(std=[255, 255, 255]),
|
112 |
+
ColorConvert(ctype=cv.COLOR_BGR2RGB)]), data_dim='hwc'),
|
113 |
+
|
114 |
)
|
115 |
|
116 |
if __name__ == '__main__':
|
tools/quantize/requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
opencv-python>=4.
|
2 |
onnx
|
3 |
onnxruntime
|
4 |
onnxruntime-extensions
|
|
|
1 |
+
opencv-python>=4.7.0
|
2 |
onnx
|
3 |
onnxruntime
|
4 |
onnxruntime-extensions
|