Update palm detection model from MediaPipe (2023feb) (#128)

* update to the latest palm detection model

* quantize palm detection model

* make palm aligned to center when resize

* update benchmark for palm detection

* update benchmark data

* update readme

* update a new quantized model

* update readme

Files changed (11) hide show

README.md +1 -1
benchmark/config/palm_detection_mediapipe.yaml +1 -1
models/handpose_estimation_mediapipe/demo.py +1 -1
models/palm_detection_mediapipe/README.md +6 -3
models/palm_detection_mediapipe/demo.py +2 -2
models/palm_detection_mediapipe/mp_palmdet.py +0 -0
tools/quantize/README.md +4 -0
tools/quantize/inc_configs/mp_palmdet.yaml +0 -52
tools/quantize/quantize-inc.py +0 -3
tools/quantize/quantize-ort.py +12 -4
tools/quantize/requirements.txt +1 -1

README.md CHANGED Viewed

@@ -34,7 +34,7 @@ Guidelines:
 | [WeChatQRCode](./models/qrcode_wechatqrcode)            | QR Code Detection and Parsing | 100x100    | 7.04           | 37.68        | ---             | ---          | ---         |
 | [DaSiamRPN](./models/object_tracking_dasiamrpn)         | Object Tracking               | 1280x720   | 36.15          | 705.48       | 76.82           | ---          | ---         |
 | [YoutuReID](./models/person_reid_youtureid)             | Person Re-Identification      | 128x256    | 35.81          | 521.98       | 90.07           | 44.61        | ---         |
-| [MP-PalmDet](./models/palm_detection_mediapipe)         | Palm Detection                | 256x256    | 15.57          | 168.37       | 50.64           | 62.45        | ---         |
 | [MP-HandPose](./models/handpose_estimation_mediapipe)   | Hand Pose Estimation          | 256x256    | 20.16          | 148.24       | 156.30          | 42.70        | ---         |
 \*: Models are quantized in per-channel mode, which run slower than per-tensor quantized models on NPU.

 | [WeChatQRCode](./models/qrcode_wechatqrcode)            | QR Code Detection and Parsing | 100x100    | 7.04           | 37.68        | ---             | ---          | ---         |
 | [DaSiamRPN](./models/object_tracking_dasiamrpn)         | Object Tracking               | 1280x720   | 36.15          | 705.48       | 76.82           | ---          | ---         |
 | [YoutuReID](./models/person_reid_youtureid)             | Person Re-Identification      | 128x256    | 35.81          | 521.98       | 90.07           | 44.61        | ---         |
+| [MP-PalmDet](./models/palm_detection_mediapipe)         | Palm Detection                | 192x192    | 11.09          | 63.79        | 83.20           | 33.81        | ---         |
 | [MP-HandPose](./models/handpose_estimation_mediapipe)   | Hand Pose Estimation          | 256x256    | 20.16          | 148.24       | 156.30          | 42.70        | ---         |
 \*: Models are quantized in per-channel mode, which run slower than per-tensor quantized models on NPU.

benchmark/config/palm_detection_mediapipe.yaml CHANGED Viewed

@@ -5,7 +5,7 @@ Benchmark:
     path: "data/palm_detection_20230125"
     files: ["palm1.jpg", "palm2.jpg", "palm3.jpg"]
     sizes: # [[w1, h1], ...], Omit to run at original scale
-      - [256, 256]
   metric:
     warmup: 30
     repeat: 10

     path: "data/palm_detection_20230125"
     files: ["palm1.jpg", "palm2.jpg", "palm3.jpg"]
     sizes: # [[w1, h1], ...], Omit to run at original scale
+      - [192, 192]
   metric:
     warmup: 30
     repeat: 10

models/handpose_estimation_mediapipe/demo.py CHANGED Viewed

@@ -91,7 +91,7 @@ def visualize(image, hands, print_result=False):
 if __name__ == '__main__':
     # palm detector
-    palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2022may.onnx',
                               nmsThreshold=0.3,
                               scoreThreshold=0.8,
                               backendId=args.backend,

 if __name__ == '__main__':
     # palm detector
+    palm_detector = MPPalmDet(modelPath='../palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
                               nmsThreshold=0.3,
                               scoreThreshold=0.8,
                               backendId=args.backend,

models/palm_detection_mediapipe/README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 # Palm detector from MediaPipe Handpose
-This model detects palm bounding boxes and palm landmarks, and is converted from Tensorflow-JS to ONNX using following tools:
-- tfjs to tf_saved_model:  https://github.com/patlevin/tfjs-to-tf/
-- tf_saved_model to ONNX: https://github.com/onnx/tensorflow-onnx
 - simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
 ## Demo
@@ -31,3 +32,5 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
 ## Reference
 - MediaPipe Handpose: https://github.com/tensorflow/tfjs-models/tree/master/handpose

 # Palm detector from MediaPipe Handpose
+This model detects palm bounding boxes and palm landmarks, and is converted from TFLite to ONNX using following tools:
+- TFLite model to ONNX: https://github.com/onnx/tensorflow-onnx
 - simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
+- SSD Anchors are generated from [GenMediaPipePalmDectionSSDAnchors](https://github.com/VimalMollyn/GenMediaPipePalmDectionSSDAnchors)
 ## Demo
 ## Reference
 - MediaPipe Handpose: https://github.com/tensorflow/tfjs-models/tree/master/handpose
+- MediaPipe hands model and model card: https://google.github.io/mediapipe/solutions/models.html#hands
+- Int8 model quantized with rgb evaluation set of FreiHAND: https://lmb.informatik.uni-freiburg.de/resources/datasets/FreihandDataset.en.html

models/palm_detection_mediapipe/demo.py CHANGED Viewed

@@ -27,10 +27,10 @@ except:
 parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
 parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2022may.onnx', help='Usage: Set model path, defaults to palm_detection_mediapipe_2022may.onnx.')
 parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
 parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
-parser.add_argument('--score_threshold', type=float, default=0.99, help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.99. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.')
 parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')

 parser = argparse.ArgumentParser(description='Hand Detector from MediaPipe')
 parser.add_argument('--input', '-i', type=str, help='Usage: Set path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='./palm_detection_mediapipe_2023feb.onnx', help='Usage: Set model path, defaults to palm_detection_mediapipe_2023feb.onnx.')
 parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
 parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
+parser.add_argument('--score_threshold', type=float, default=0.8, help='Usage: Set the minimum needed confidence for the model to identify a palm, defaults to 0.8. Smaller values may result in faster detection, but will limit accuracy. Filter out faces of confidence < conf_threshold. An empirical score threshold for the quantized model is 0.49.')
 parser.add_argument('--nms_threshold', type=float, default=0.3, help='Usage: Suppress bounding boxes of iou >= nms_threshold. Default = 0.3.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Usage: Set “True” to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input. Default will be set to “False”.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Usage: Default will be set to “True” and will open a new window to show results. Set to “False” to stop visualizations from being shown. Invalid in case of camera input.')

models/palm_detection_mediapipe/mp_palmdet.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

tools/quantize/README.md CHANGED Viewed

@@ -51,3 +51,7 @@ models = dict(
 # 3. quantize your model
 python quantize-inc.py model1
 ```

 # 3. quantize your model
 python quantize-inc.py model1
 ```
+## Dataset
+Some models are quantized with extra datasets.
+- [MP-PalmDet](../../models/palm_detection_mediapipe) int8 model quantized with evaluation set of [FreiHAND](https://lmb.informatik.uni-freiburg.de/resources/datasets/FreihandDataset.en.html). The dataset downloaded from [link](https://lmb.informatik.uni-freiburg.de/data/freihand/FreiHAND_pub_v2_eval.zip). Unpack it and path to `FreiHAND_pub_v2_eval/evaluation/rgb`.

tools/quantize/inc_configs/mp_palmdet.yaml DELETED Viewed

@@ -1,52 +0,0 @@
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-version: 1.0
-model:                                               # mandatory. used to specify model specific information.
-  name: mp_palmdet
-  framework: onnxrt_qlinearops                       # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension.
-quantization:                                        # optional. tuning constraints on model-wise for advance user to reduce tuning space.
-  approach: post_training_static_quant               # optional. default value is post_training_static_quant.
-  calibration:
-    dataloader:
-      batch_size: 1
-      dataset:
-        dummy:
-          shape: [1, 256, 256, 3]
-          low: -1.0
-          high: 1.0
-          dtype: float32
-          label: True
-  model_wise:                                        # optional. tuning constraints on model-wise for advance user to reduce tuning space.
-    weight:
-      granularity: per_tensor
-      scheme: asym
-      dtype: int8
-      algorithm: minmax
-    activation:
-      granularity: per_tensor
-      scheme: asym
-      dtype: int8
-      algorithm: minmax
-tuning:
-  accuracy_criterion:
-    relative:  0.02                                  # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%.
-  exit_policy:
-    timeout: 0                                       # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit.
-  random_seed: 9527                                  # optional. random seed for deterministic tuning.

tools/quantize/quantize-inc.py CHANGED Viewed

@@ -127,9 +127,6 @@ models = dict(
                          config_path='./inc_configs/mobilenet.yaml'),
     mobilenetv2=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr.onnx',
                          config_path='./inc_configs/mobilenet.yaml'),
-    mp_palmdet=Quantize(model_path='../../models/palm_detection_mediapipe/palm_detection_mediapipe_2022may.onnx',
-                        config_path='./inc_configs/mp_palmdet.yaml',
-                        custom_dataset=Dataset(root='../../benchmark/data/palm_detection', dim='hwc', swapRB=True, mean=127.5, std=127.5, toFP32=True)),
     mp_handpose=Quantize(model_path='../../models/handpose_estimation_mediapipe/handpose_estimation_mediapipe_2022may.onnx',
                          config_path='./inc_configs/mp_handpose.yaml',
                          custom_dataset=Dataset(root='../../benchmark/data/palm_detection', dim='hwc', swapRB=True, mean=127.5, std=127.5, toFP32=True)),

                          config_path='./inc_configs/mobilenet.yaml'),
     mobilenetv2=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr.onnx',
                          config_path='./inc_configs/mobilenet.yaml'),
     mp_handpose=Quantize(model_path='../../models/handpose_estimation_mediapipe/handpose_estimation_mediapipe_2022may.onnx',
                          config_path='./inc_configs/mp_handpose.yaml',
                          custom_dataset=Dataset(root='../../benchmark/data/palm_detection', dim='hwc', swapRB=True, mean=127.5, std=127.5, toFP32=True)),

tools/quantize/quantize-ort.py CHANGED Viewed

@@ -17,10 +17,11 @@ from onnxruntime.quantization import quantize_static, CalibrationDataReader, Qua
 from transform import Compose, Resize, CenterCrop, Normalize, ColorConvert
 class DataReader(CalibrationDataReader):
-    def __init__(self, model_path, image_dir, transforms):
         model = onnx.load(model_path)
         self.input_name = model.graph.input[0].name
         self.transforms = transforms
         self.data = self.get_calibration_data(image_dir)
         self.enum_data_dicts = iter([{self.input_name: x} for x in self.data])
@@ -37,11 +38,13 @@ class DataReader(CalibrationDataReader):
             img = cv.imread(os.path.join(image_dir, image_name))
             img = self.transforms(img)
             blob = cv.dnn.blobFromImage(img)
             blobs.append(blob)
         return blobs
 class Quantize:
-    def __init__(self, model_path, calibration_image_dir, transforms=Compose(), per_channel=False, act_type='int8', wt_type='int8'):
         self.type_dict = {"uint8" : QuantType.QUInt8, "int8" : QuantType.QInt8}
         self.model_path = model_path
@@ -52,7 +55,7 @@ class Quantize:
         self.wt_type = wt_type
         # data reader
-        self.dr = DataReader(self.model_path, self.calibration_image_dir, self.transforms)
     def check_opset(self, convert=True):
         model = onnx.load(self.model_path)
@@ -102,7 +105,12 @@ models=dict(
                      transforms=Compose([Resize(size=(100, 32)), Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5]), ColorConvert(ctype=cv.COLOR_BGR2GRAY)])),
     crnn_cn=Quantize(model_path='../../models/text_recognition_crnn/text_recognition_CRNN_CN_2021nov.onnx',
                      calibration_image_dir='../../benchmark/data/text',
-                     transforms=Compose([Resize(size=(100, 32))]))
 )
 if __name__ == '__main__':

 from transform import Compose, Resize, CenterCrop, Normalize, ColorConvert
 class DataReader(CalibrationDataReader):
+    def __init__(self, model_path, image_dir, transforms, data_dim):
         model = onnx.load(model_path)
         self.input_name = model.graph.input[0].name
         self.transforms = transforms
+        self.data_dim = data_dim
         self.data = self.get_calibration_data(image_dir)
         self.enum_data_dicts = iter([{self.input_name: x} for x in self.data])
             img = cv.imread(os.path.join(image_dir, image_name))
             img = self.transforms(img)
             blob = cv.dnn.blobFromImage(img)
+            if self.data_dim == 'hwc':
+                blob = cv.transposeND(blob, [0, 2, 3, 1])
             blobs.append(blob)
         return blobs
 class Quantize:
+    def __init__(self, model_path, calibration_image_dir, transforms=Compose(), per_channel=False, act_type='int8', wt_type='int8', data_dim='chw'):
         self.type_dict = {"uint8" : QuantType.QUInt8, "int8" : QuantType.QInt8}
         self.model_path = model_path
         self.wt_type = wt_type
         # data reader
+        self.dr = DataReader(self.model_path, self.calibration_image_dir, self.transforms, data_dim)
     def check_opset(self, convert=True):
         model = onnx.load(self.model_path)
                      transforms=Compose([Resize(size=(100, 32)), Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5]), ColorConvert(ctype=cv.COLOR_BGR2GRAY)])),
     crnn_cn=Quantize(model_path='../../models/text_recognition_crnn/text_recognition_CRNN_CN_2021nov.onnx',
                      calibration_image_dir='../../benchmark/data/text',
+                     transforms=Compose([Resize(size=(100, 32))])),
+    mp_palmdet=Quantize(model_path='../../models/palm_detection_mediapipe/palm_detection_mediapipe_2023feb.onnx',
+                        calibration_image_dir='path/to/dataset',
+                        transforms=Compose([Resize(size=(192, 192)), Normalize(std=[255, 255, 255]),
+                        ColorConvert(ctype=cv.COLOR_BGR2RGB)]), data_dim='hwc'),
 )
 if __name__ == '__main__':

tools/quantize/requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-opencv-python>=4.5.4.58
 onnx
 onnxruntime
 onnxruntime-extensions

+opencv-python>=4.7.0
 onnx
 onnxruntime
 onnxruntime-extensions