[GSoC] Add block quantized models (#270)
Browse files* Gemm and MatMul block quantization support
* refactoring
* fix indentation
* node name independent
* Block quantization tool:
- constant weight category supported
- add data type saturation
- handled the case in which all the elements within a block are the same
benchmark script modified to support block quantized models
block quantized some models
* add missing block quantized models
* formatting
* add blocked models to eval script. Evaluation yunet
* Add sface and pphumanseg evaluation, block quantization tool fix, handpose blocked model fix, removed blocked CRNN EN,
* changed evaluation metric in block_quantize script and add verbose mode
* Add evaluation for PP-ResNet and Mobilenet
* changed file suffix and update readmes
* renamed int8bq
- benchmark/README.md +1 -1
- benchmark/benchmark.py +5 -2
- models/__init__.py +4 -0
- models/face_detection_yunet/README.md +7 -2
- models/face_recognition_sface/README.md +3 -0
- models/facial_expression_recognition/README.md +1 -0
- models/handpose_estimation_mediapipe/README.md +1 -0
- models/human_segmentation_pphumanseg/README.md +7 -2
- models/image_classification_mobilenet/README.md +6 -0
- models/image_classification_ppresnet/README.md +5 -0
- models/license_plate_detection_yunet/README.md +3 -0
- models/object_detection_nanodet/README.md +3 -1
- models/object_detection_yolox/README.md +3 -1
- models/object_tracking_vittrack/README.md +4 -1
- models/optical_flow_estimation_raft/README.md +2 -0
- models/palm_detection_mediapipe/README.md +1 -0
- models/person_detection_mediapipe/README.md +3 -0
- models/person_reid_youtureid/README.md +2 -2
- models/pose_estimation_mediapipe/README.md +2 -0
- models/text_detection_ppocr/README.md +2 -1
- models/text_recognition_crnn/README.md +2 -1
- tools/eval/README.md +15 -1
- tools/eval/eval.py +33 -0
- tools/quantize/README.md +2 -0
- tools/quantize/block_quantize.py +111 -37
benchmark/README.md
CHANGED
@@ -26,7 +26,7 @@ python benchmark.py --cfg ./config/face_detection_yunet.yaml
|
|
26 |
# All configs
|
27 |
python benchmark.py --all
|
28 |
|
29 |
-
# All configs but only fp32 models (--fp32, --fp16, --int8 are available for now)
|
30 |
python benchmark.py --all --fp32
|
31 |
|
32 |
# All configs but exclude some of them (fill with config name keywords, not sensitive to upper/lower case, seperate with colons)
|
|
|
26 |
# All configs
|
27 |
python benchmark.py --all
|
28 |
|
29 |
+
# All configs but only fp32 models (--fp32, --fp16, --int8 --int8bq are available for now)
|
30 |
python benchmark.py --all --fp32
|
31 |
|
32 |
# All configs but exclude some of them (fill with config name keywords, not sensitive to upper/lower case, seperate with colons)
|
benchmark/benchmark.py
CHANGED
@@ -46,6 +46,7 @@ parser.add_argument("--model_exclude", type=str, help="Models to be excluded. Sp
|
|
46 |
parser.add_argument("--fp32", action="store_true", help="Benchmark models of float32 precision only.")
|
47 |
parser.add_argument("--fp16", action="store_true", help="Benchmark models of float16 precision only.")
|
48 |
parser.add_argument("--int8", action="store_true", help="Benchmark models of int8 precision only.")
|
|
|
49 |
parser.add_argument("--all", action="store_true", help="Benchmark all models")
|
50 |
args = parser.parse_args()
|
51 |
|
@@ -194,15 +195,17 @@ if __name__ == '__main__':
|
|
194 |
model_handler, model_paths = MODELS.get(model_config.pop('name'))
|
195 |
|
196 |
_model_paths = []
|
197 |
-
if args.fp32 or args.fp16 or args.int8:
|
198 |
if args.fp32:
|
199 |
_model_paths += model_paths['fp32']
|
200 |
if args.fp16:
|
201 |
_model_paths += model_paths['fp16']
|
202 |
if args.int8:
|
203 |
_model_paths += model_paths['int8']
|
|
|
|
|
204 |
else:
|
205 |
-
_model_paths = model_paths['fp32'] + model_paths['fp16'] + model_paths['int8']
|
206 |
# filter out excluded models
|
207 |
excludes = []
|
208 |
if args.model_exclude is not None:
|
|
|
46 |
parser.add_argument("--fp32", action="store_true", help="Benchmark models of float32 precision only.")
|
47 |
parser.add_argument("--fp16", action="store_true", help="Benchmark models of float16 precision only.")
|
48 |
parser.add_argument("--int8", action="store_true", help="Benchmark models of int8 precision only.")
|
49 |
+
parser.add_argument("--int8bq", action="store_true", help="Benchmark models of blocked int8 precision only.")
|
50 |
parser.add_argument("--all", action="store_true", help="Benchmark all models")
|
51 |
args = parser.parse_args()
|
52 |
|
|
|
195 |
model_handler, model_paths = MODELS.get(model_config.pop('name'))
|
196 |
|
197 |
_model_paths = []
|
198 |
+
if args.fp32 or args.fp16 or args.int8 or args.int8bq:
|
199 |
if args.fp32:
|
200 |
_model_paths += model_paths['fp32']
|
201 |
if args.fp16:
|
202 |
_model_paths += model_paths['fp16']
|
203 |
if args.int8:
|
204 |
_model_paths += model_paths['int8']
|
205 |
+
if args.int8bq:
|
206 |
+
_model_paths += model_paths['int8bq']
|
207 |
else:
|
208 |
+
_model_paths = model_paths['fp32'] + model_paths['fp16'] + model_paths['int8'] + model_paths["int8bq"]
|
209 |
# filter out excluded models
|
210 |
excludes = []
|
211 |
if args.model_exclude is not None:
|
models/__init__.py
CHANGED
@@ -46,6 +46,7 @@ class ModuleRegistery:
|
|
46 |
fp32_model_paths = []
|
47 |
fp16_model_paths = []
|
48 |
int8_model_paths = []
|
|
|
49 |
# onnx
|
50 |
ret_onnx = sorted(glob.glob(os.path.join(model_dir, "*.onnx")))
|
51 |
if "object_tracking" in item.__module__:
|
@@ -57,6 +58,8 @@ class ModuleRegistery:
|
|
57 |
int8_model_paths.append([r])
|
58 |
elif "fp16" in r: # exclude fp16 for now
|
59 |
fp16_model_paths.append([r])
|
|
|
|
|
60 |
else:
|
61 |
fp32_model_paths.append([r])
|
62 |
# caffe
|
@@ -72,6 +75,7 @@ class ModuleRegistery:
|
|
72 |
fp32=fp32_model_paths,
|
73 |
fp16=fp16_model_paths,
|
74 |
int8=int8_model_paths,
|
|
|
75 |
)
|
76 |
|
77 |
self._dict[item.__name__] = (item, all_model_paths)
|
|
|
46 |
fp32_model_paths = []
|
47 |
fp16_model_paths = []
|
48 |
int8_model_paths = []
|
49 |
+
int8bq_model_paths = []
|
50 |
# onnx
|
51 |
ret_onnx = sorted(glob.glob(os.path.join(model_dir, "*.onnx")))
|
52 |
if "object_tracking" in item.__module__:
|
|
|
58 |
int8_model_paths.append([r])
|
59 |
elif "fp16" in r: # exclude fp16 for now
|
60 |
fp16_model_paths.append([r])
|
61 |
+
elif "blocked" in r:
|
62 |
+
int8bq_model_paths.append([r])
|
63 |
else:
|
64 |
fp32_model_paths.append([r])
|
65 |
# caffe
|
|
|
75 |
fp32=fp32_model_paths,
|
76 |
fp16=fp16_model_paths,
|
77 |
int8=int8_model_paths,
|
78 |
+
int8bq=int8bq_model_paths
|
79 |
)
|
80 |
|
81 |
self._dict[item.__name__] = (item, all_model_paths)
|
models/face_detection_yunet/README.md
CHANGED
@@ -8,15 +8,20 @@ Notes:
|
|
8 |
- This model can detect **faces of pixels between around 10x10 to 300x300** due to the training scheme.
|
9 |
- For details on training this model, please visit https://github.com/ShiqiYu/libfacedetection.train.
|
10 |
- This ONNX model has fixed input shape, but OpenCV DNN infers on the exact shape of input image. See https://github.com/opencv/opencv_zoo/issues/44 for more information.
|
|
|
11 |
|
12 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
13 |
|
14 |
| Models | Easy AP | Medium AP | Hard AP |
|
15 |
| ----------- | ------- | --------- | ------- |
|
16 |
-
| YuNet | 0.
|
17 |
-
| YuNet
|
|
|
|
|
18 |
|
19 |
\*: 'quant' stands for 'quantized'.
|
|
|
|
|
20 |
|
21 |
## Demo
|
22 |
|
|
|
8 |
- This model can detect **faces of pixels between around 10x10 to 300x300** due to the training scheme.
|
9 |
- For details on training this model, please visit https://github.com/ShiqiYu/libfacedetection.train.
|
10 |
- This ONNX model has fixed input shape, but OpenCV DNN infers on the exact shape of input image. See https://github.com/opencv/opencv_zoo/issues/44 for more information.
|
11 |
+
- `face_detection_yunet_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
12 |
|
13 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
14 |
|
15 |
| Models | Easy AP | Medium AP | Hard AP |
|
16 |
| ----------- | ------- | --------- | ------- |
|
17 |
+
| YuNet | 0.8844 | 0.8656 | 0.7503 |
|
18 |
+
| YuNet block | 0.8845 | 0.8652 | 0.7504 |
|
19 |
+
| YuNet quant | 0.8810 | 0.8629 | 0.7503 |
|
20 |
+
|
21 |
|
22 |
\*: 'quant' stands for 'quantized'.
|
23 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
24 |
+
|
25 |
|
26 |
## Demo
|
27 |
|
models/face_recognition_sface/README.md
CHANGED
@@ -8,15 +8,18 @@ Note:
|
|
8 |
- Model files encode MobileFaceNet instances trained on the SFace loss function, see the [SFace paper](https://arxiv.org/abs/2205.12010) for reference.
|
9 |
- ONNX file conversions from [original code base](https://github.com/zhongyy/SFace) thanks to [Chengrui Wang](https://github.com/crywang).
|
10 |
- (As of Sep 2021) Supporting 5-landmark warping for now, see below for details.
|
|
|
11 |
|
12 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
13 |
|
14 |
| Models | Accuracy |
|
15 |
| ----------- | -------- |
|
16 |
| SFace | 0.9940 |
|
|
|
17 |
| SFace quant | 0.9932 |
|
18 |
|
19 |
\*: 'quant' stands for 'quantized'.
|
|
|
20 |
|
21 |
## Demo
|
22 |
|
|
|
8 |
- Model files encode MobileFaceNet instances trained on the SFace loss function, see the [SFace paper](https://arxiv.org/abs/2205.12010) for reference.
|
9 |
- ONNX file conversions from [original code base](https://github.com/zhongyy/SFace) thanks to [Chengrui Wang](https://github.com/crywang).
|
10 |
- (As of Sep 2021) Supporting 5-landmark warping for now, see below for details.
|
11 |
+
- `face_recognition_sface_2021dec_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
12 |
|
13 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
14 |
|
15 |
| Models | Accuracy |
|
16 |
| ----------- | -------- |
|
17 |
| SFace | 0.9940 |
|
18 |
+
| SFace block | 0.9942 |
|
19 |
| SFace quant | 0.9932 |
|
20 |
|
21 |
\*: 'quant' stands for 'quantized'.
|
22 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
23 |
|
24 |
## Demo
|
25 |
|
models/facial_expression_recognition/README.md
CHANGED
@@ -7,6 +7,7 @@ Note:
|
|
7 |
- Progressive Teacher is contributed by [Jing Jiang](https://scholar.google.com/citations?user=OCwcfAwAAAAJ&hl=zh-CN).
|
8 |
- [MobileFaceNet](https://link.springer.com/chapter/10.1007/978-3-319-97909-0_46) is used as the backbone and the model is able to classify seven basic facial expressions (angry, disgust, fearful, happy, neutral, sad, surprised).
|
9 |
- [facial_expression_recognition_mobilefacenet_2022july.onnx](https://github.com/opencv/opencv_zoo/raw/master/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx) is implemented thanks to [Chengrui Wang](https://github.com/crywang).
|
|
|
10 |
|
11 |
Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html).
|
12 |
|
|
|
7 |
- Progressive Teacher is contributed by [Jing Jiang](https://scholar.google.com/citations?user=OCwcfAwAAAAJ&hl=zh-CN).
|
8 |
- [MobileFaceNet](https://link.springer.com/chapter/10.1007/978-3-319-97909-0_46) is used as the backbone and the model is able to classify seven basic facial expressions (angry, disgust, fearful, happy, neutral, sad, surprised).
|
9 |
- [facial_expression_recognition_mobilefacenet_2022july.onnx](https://github.com/opencv/opencv_zoo/raw/master/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx) is implemented thanks to [Chengrui Wang](https://github.com/crywang).
|
10 |
+
- `facial_expression_recognition_mobilefacenet_2022july_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
11 |
|
12 |
Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html).
|
13 |
|
models/handpose_estimation_mediapipe/README.md
CHANGED
@@ -14,6 +14,7 @@ This model is converted from TFlite to ONNX using following tools:
|
|
14 |
**Note**:
|
15 |
- The int8-quantized model may produce invalid results due to a significant drop of accuracy.
|
16 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#hands for models of larger scale.
|
|
|
17 |
|
18 |
## Demo
|
19 |
|
|
|
14 |
**Note**:
|
15 |
- The int8-quantized model may produce invalid results due to a significant drop of accuracy.
|
16 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#hands for models of larger scale.
|
17 |
+
- `handpose_estimation_mediapipe_2023feb_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
18 |
|
19 |
## Demo
|
20 |
|
models/human_segmentation_pphumanseg/README.md
CHANGED
@@ -2,6 +2,9 @@
|
|
2 |
|
3 |
This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) using [this script from OpenCV](https://github.com/opencv/opencv/blob/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle/paddle_humanseg.py).
|
4 |
|
|
|
|
|
|
|
5 |
## Demo
|
6 |
|
7 |
### Python
|
@@ -46,11 +49,13 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
|
46 |
|
47 |
| Models | Accuracy | mIoU |
|
48 |
| ------------------ | -------------- | ------------- |
|
49 |
-
| PPHumanSeg | 0.
|
50 |
-
| PPHumanSeg
|
|
|
51 |
|
52 |
|
53 |
\*: 'quant' stands for 'quantized'.
|
|
|
54 |
|
55 |
---
|
56 |
## License
|
|
|
2 |
|
3 |
This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) using [this script from OpenCV](https://github.com/opencv/opencv/blob/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle/paddle_humanseg.py).
|
4 |
|
5 |
+
**Note**:
|
6 |
+
- `human_segmentation_pphumanseg_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
7 |
+
|
8 |
## Demo
|
9 |
|
10 |
### Python
|
|
|
49 |
|
50 |
| Models | Accuracy | mIoU |
|
51 |
| ------------------ | -------------- | ------------- |
|
52 |
+
| PPHumanSeg | 0.9656 | 0.9164 |
|
53 |
+
| PPHumanSeg block | 0.9655 | 0.9162 |
|
54 |
+
| PPHumanSeg quant | 0.7285 | 0.3642 |
|
55 |
|
56 |
|
57 |
\*: 'quant' stands for 'quantized'.
|
58 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
59 |
|
60 |
---
|
61 |
## License
|
models/image_classification_mobilenet/README.md
CHANGED
@@ -4,16 +4,22 @@ MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applicatio
|
|
4 |
|
5 |
MobileNetV2: Inverted Residuals and Linear Bottlenecks
|
6 |
|
|
|
|
|
|
|
7 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
8 |
|
9 |
| Models | Top-1 Accuracy | Top-5 Accuracy |
|
10 |
| ------------------ | -------------- | -------------- |
|
11 |
| MobileNet V1 | 67.64 | 87.97 |
|
|
|
12 |
| MobileNet V1 quant | 55.53 | 78.74 |
|
13 |
| MobileNet V2 | 69.44 | 89.23 |
|
|
|
14 |
| MobileNet V2 quant | 68.37 | 88.56 |
|
15 |
|
16 |
\*: 'quant' stands for 'quantized'.
|
|
|
17 |
|
18 |
## Demo
|
19 |
|
|
|
4 |
|
5 |
MobileNetV2: Inverted Residuals and Linear Bottlenecks
|
6 |
|
7 |
+
**Note**:
|
8 |
+
- `image_classification_mobilenetvX_2022apr_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
9 |
+
|
10 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
11 |
|
12 |
| Models | Top-1 Accuracy | Top-5 Accuracy |
|
13 |
| ------------------ | -------------- | -------------- |
|
14 |
| MobileNet V1 | 67.64 | 87.97 |
|
15 |
+
| MobileNet V1 block | 67.21 | 87.62 |
|
16 |
| MobileNet V1 quant | 55.53 | 78.74 |
|
17 |
| MobileNet V2 | 69.44 | 89.23 |
|
18 |
+
| MobileNet V2 block | 68.66 | 88.90 |
|
19 |
| MobileNet V2 quant | 68.37 | 88.56 |
|
20 |
|
21 |
\*: 'quant' stands for 'quantized'.
|
22 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
23 |
|
24 |
## Demo
|
25 |
|
models/image_classification_ppresnet/README.md
CHANGED
@@ -4,14 +4,19 @@ Deep Residual Learning for Image Recognition
|
|
4 |
|
5 |
This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) using [this script from OpenCV](https://github.com/opencv/opencv/blob/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle/paddle_resnet50.py).
|
6 |
|
|
|
|
|
|
|
7 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
8 |
|
9 |
| Models | Top-1 Accuracy | Top-5 Accuracy |
|
10 |
| --------------- | -------------- | -------------- |
|
11 |
| PP-ResNet | 82.28 | 96.15 |
|
|
|
12 |
| PP-ResNet quant | 0.22 | 0.96 |
|
13 |
|
14 |
\*: 'quant' stands for 'quantized'.
|
|
|
15 |
|
16 |
## Demo
|
17 |
|
|
|
4 |
|
5 |
This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) using [this script from OpenCV](https://github.com/opencv/opencv/blob/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle/paddle_resnet50.py).
|
6 |
|
7 |
+
**Note**:
|
8 |
+
- `image_classification_ppresnet50_2022jan_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
9 |
+
|
10 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
11 |
|
12 |
| Models | Top-1 Accuracy | Top-5 Accuracy |
|
13 |
| --------------- | -------------- | -------------- |
|
14 |
| PP-ResNet | 82.28 | 96.15 |
|
15 |
+
| PP-ResNet block | 82.27 | 96.15 |
|
16 |
| PP-ResNet quant | 0.22 | 0.96 |
|
17 |
|
18 |
\*: 'quant' stands for 'quantized'.
|
19 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
20 |
|
21 |
## Demo
|
22 |
|
models/license_plate_detection_yunet/README.md
CHANGED
@@ -4,6 +4,9 @@ This model is contributed by Dong Xu (徐栋) from [watrix.ai](watrix.ai) (银
|
|
4 |
|
5 |
Please note that the model is trained with Chinese license plates, so the detection results of other license plates with this model may be limited.
|
6 |
|
|
|
|
|
|
|
7 |
## Demo
|
8 |
|
9 |
Run the following command to try the demo:
|
|
|
4 |
|
5 |
Please note that the model is trained with Chinese license plates, so the detection results of other license plates with this model may be limited.
|
6 |
|
7 |
+
**Note**:
|
8 |
+
- `license_plate_detection_lpd_yunet_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
9 |
+
|
10 |
## Demo
|
11 |
|
12 |
Run the following command to try the demo:
|
models/object_detection_nanodet/README.md
CHANGED
@@ -2,8 +2,10 @@
|
|
2 |
|
3 |
Nanodet: NanoDet is a FCOS-style one-stage anchor-free object detection model which using Generalized Focal Loss as classification and regression loss.In NanoDet-Plus, we propose a novel label assignment strategy with a simple assign guidance module (AGM) and a dynamic soft label assigner (DSLA) to solve the optimal label assignment problem in lightweight model training.
|
4 |
|
5 |
-
Note
|
6 |
- This version of nanodet: Nanodet-m-plus-1.5x_416
|
|
|
|
|
7 |
|
8 |
## Demo
|
9 |
|
|
|
2 |
|
3 |
Nanodet: NanoDet is a FCOS-style one-stage anchor-free object detection model which using Generalized Focal Loss as classification and regression loss.In NanoDet-Plus, we propose a novel label assignment strategy with a simple assign guidance module (AGM) and a dynamic soft label assigner (DSLA) to solve the optimal label assignment problem in lightweight model training.
|
4 |
|
5 |
+
**Note**:
|
6 |
- This version of nanodet: Nanodet-m-plus-1.5x_416
|
7 |
+
- `object_detection_nanodet_2022nov_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
8 |
+
|
9 |
|
10 |
## Demo
|
11 |
|
models/object_detection_yolox/README.md
CHANGED
@@ -8,8 +8,10 @@ Key features of the YOLOX object detector
|
|
8 |
- **SimOTA advanced label assignment strategy** reduces training time and avoids additional solver hyperparameters
|
9 |
- **Strong data augmentations like MixUp and Mosiac** to boost YOLOX performance
|
10 |
|
11 |
-
Note
|
12 |
- This version of YoloX: YoloX_s
|
|
|
|
|
13 |
|
14 |
## Demo
|
15 |
|
|
|
8 |
- **SimOTA advanced label assignment strategy** reduces training time and avoids additional solver hyperparameters
|
9 |
- **Strong data augmentations like MixUp and Mosiac** to boost YOLOX performance
|
10 |
|
11 |
+
**Note**:
|
12 |
- This version of YoloX: YoloX_s
|
13 |
+
- `object_detection_yolox_2022nov_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
14 |
+
|
15 |
|
16 |
## Demo
|
17 |
|
models/object_tracking_vittrack/README.md
CHANGED
@@ -8,7 +8,10 @@ Video demo: https://youtu.be/MJiPnu1ZQRI
|
|
8 |
|
9 |
This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC 2023 project [**Realtime object tracking models**](https://github.com/opencv/opencv/wiki/GSoC_2023#idea-realtime-object-tracking-models)
|
10 |
|
11 |
-
**
|
|
|
|
|
|
|
12 |
|
13 |
# Demo
|
14 |
## Python
|
|
|
8 |
|
9 |
This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC 2023 project [**Realtime object tracking models**](https://github.com/opencv/opencv/wiki/GSoC_2023#idea-realtime-object-tracking-models)
|
10 |
|
11 |
+
**Note**:
|
12 |
+
- OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
|
13 |
+
- `object_tracking_vittrack_2023sep_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
14 |
+
|
15 |
|
16 |
# Demo
|
17 |
## Python
|
models/optical_flow_estimation_raft/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
# RAFT
|
2 |
This model is originally created by Zachary Teed and Jia Deng of Princeton University. The source code for the model is at [their repository on GitHub](https://github.com/princeton-vl/RAFT), and the original [research paper](https://arxiv.org/abs/2003.12039) is published on [Arxiv](https://arxiv.org/abs/2003.12039). The model was converted to ONNX by [PINTO0309](https://github.com/PINTO0309) in his [model zoo](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/252_RAFT). The ONNX model has several variations depending on the training dataset and input dimesnions. The model used in this demo is trained on Sintel dataset with input size of 360 $\times$ 480.
|
3 |
|
|
|
|
|
4 |
|
5 |
## Demo
|
6 |
|
|
|
1 |
# RAFT
|
2 |
This model is originally created by Zachary Teed and Jia Deng of Princeton University. The source code for the model is at [their repository on GitHub](https://github.com/princeton-vl/RAFT), and the original [research paper](https://arxiv.org/abs/2003.12039) is published on [Arxiv](https://arxiv.org/abs/2003.12039). The model was converted to ONNX by [PINTO0309](https://github.com/PINTO0309) in his [model zoo](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/252_RAFT). The ONNX model has several variations depending on the training dataset and input dimesnions. The model used in this demo is trained on Sintel dataset with input size of 360 $\times$ 480.
|
3 |
|
4 |
+
**Note**:
|
5 |
+
- `optical_flow_estimation_raft_2023aug_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
6 |
|
7 |
## Demo
|
8 |
|
models/palm_detection_mediapipe/README.md
CHANGED
@@ -9,6 +9,7 @@ SSD Anchors are generated from [GenMediaPipePalmDectionSSDAnchors](https://githu
|
|
9 |
|
10 |
**Note**:
|
11 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#hands for models of larger scale.
|
|
|
12 |
|
13 |
## Demo
|
14 |
|
|
|
9 |
|
10 |
**Note**:
|
11 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#hands for models of larger scale.
|
12 |
+
- `palm_detection_mediapipe_2023feb_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
13 |
|
14 |
## Demo
|
15 |
|
models/person_detection_mediapipe/README.md
CHANGED
@@ -7,6 +7,9 @@ This model detects upper body and full body keypoints of a person, and is downlo
|
|
7 |
|
8 |
SSD Anchors are generated from [GenMediaPipePalmDectionSSDAnchors](https://github.com/VimalMollyn/GenMediaPipePalmDectionSSDAnchors)
|
9 |
|
|
|
|
|
|
|
10 |
## Demo
|
11 |
|
12 |
### Python
|
|
|
7 |
|
8 |
SSD Anchors are generated from [GenMediaPipePalmDectionSSDAnchors](https://github.com/VimalMollyn/GenMediaPipePalmDectionSSDAnchors)
|
9 |
|
10 |
+
**Note**:
|
11 |
+
- `person_detection_mediapipe_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
12 |
+
|
13 |
## Demo
|
14 |
|
15 |
### Python
|
models/person_reid_youtureid/README.md
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
|
3 |
This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/opencv/opencv/blob/394e640909d5d8edf9c1f578f8216d513373698c/samples/dnn/person_reid.py#L6-L11).
|
4 |
|
5 |
-
Note
|
6 |
-
|
7 |
- Model source: https://github.com/ReID-Team/ReID_extra_testdata
|
|
|
8 |
|
9 |
## Demo
|
10 |
|
|
|
2 |
|
3 |
This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/opencv/opencv/blob/394e640909d5d8edf9c1f578f8216d513373698c/samples/dnn/person_reid.py#L6-L11).
|
4 |
|
5 |
+
**Note**:
|
|
|
6 |
- Model source: https://github.com/ReID-Team/ReID_extra_testdata
|
7 |
+
- `person_reid_youtu_2021nov_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
8 |
|
9 |
## Demo
|
10 |
|
models/pose_estimation_mediapipe/README.md
CHANGED
@@ -10,6 +10,8 @@ This model is converted from TFlite to ONNX using following tools:
|
|
10 |
|
11 |
**Note**:
|
12 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#pose for models of larger scale.
|
|
|
|
|
13 |
## Demo
|
14 |
|
15 |
### python
|
|
|
10 |
|
11 |
**Note**:
|
12 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#pose for models of larger scale.
|
13 |
+
- `pose_estimation_mediapipe_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
14 |
+
|
15 |
## Demo
|
16 |
|
17 |
### python
|
models/text_detection_ppocr/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
|
3 |
PP-OCRv3: More Attempts for the Improvement of Ultra Lightweight OCR System.
|
4 |
|
5 |
-
Note
|
6 |
|
7 |
- The int8 quantization model may produce unstable results due to some loss of accuracy.
|
8 |
- Original Paddle Models source of English: [here](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar).
|
@@ -10,6 +10,7 @@ Note:
|
|
10 |
- `IC15` in the filename means the model is trained on [IC15 dataset](https://rrc.cvc.uab.es/?ch=4&com=introduction), which can detect English text instances only.
|
11 |
- `TD500` in the filename means the model is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances.
|
12 |
- Visit https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html for more information.
|
|
|
13 |
|
14 |
## Demo
|
15 |
|
|
|
2 |
|
3 |
PP-OCRv3: More Attempts for the Improvement of Ultra Lightweight OCR System.
|
4 |
|
5 |
+
**Note**:
|
6 |
|
7 |
- The int8 quantization model may produce unstable results due to some loss of accuracy.
|
8 |
- Original Paddle Models source of English: [here](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar).
|
|
|
10 |
- `IC15` in the filename means the model is trained on [IC15 dataset](https://rrc.cvc.uab.es/?ch=4&com=introduction), which can detect English text instances only.
|
11 |
- `TD500` in the filename means the model is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances.
|
12 |
- Visit https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html for more information.
|
13 |
+
- `text_detection_xx_ppocrv3_2023may_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
14 |
|
15 |
## Demo
|
16 |
|
models/text_recognition_crnn/README.md
CHANGED
@@ -15,7 +15,7 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval) at different
|
|
15 |
|
16 |
\*: 'FP16' or 'INT8' stands for 'model quantized into FP16' or 'model quantized into int8'
|
17 |
|
18 |
-
Note
|
19 |
|
20 |
- Model source:
|
21 |
- `text_recognition_CRNN_EN_2021sep.onnx`: https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html (CRNN_VGG_BiLSTM_CTC.onnx)
|
@@ -25,6 +25,7 @@ Note:
|
|
25 |
- `text_recognition_CRNN_CH_2021sep.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), and some special characters (see `CHARSET_CH_94` for details in `crnn.py`).
|
26 |
- `text_recognition_CRNN_CN_2021nov.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), some Chinese characters and some special characters (see `CHARSET_CN_3944` for details in `crnn.py`).
|
27 |
- For details on training this model series, please visit https://github.com/zihaomu/deep-text-recognition-benchmark.
|
|
|
28 |
|
29 |
## Demo
|
30 |
|
|
|
15 |
|
16 |
\*: 'FP16' or 'INT8' stands for 'model quantized into FP16' or 'model quantized into int8'
|
17 |
|
18 |
+
**Note**:
|
19 |
|
20 |
- Model source:
|
21 |
- `text_recognition_CRNN_EN_2021sep.onnx`: https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html (CRNN_VGG_BiLSTM_CTC.onnx)
|
|
|
25 |
- `text_recognition_CRNN_CH_2021sep.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), and some special characters (see `CHARSET_CH_94` for details in `crnn.py`).
|
26 |
- `text_recognition_CRNN_CN_2021nov.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), some Chinese characters and some special characters (see `CHARSET_CN_3944` for details in `crnn.py`).
|
27 |
- For details on training this model series, please visit https://github.com/zihaomu/deep-text-recognition-benchmark.
|
28 |
+
- `text_recognition_CRNN_XX_2021xxx_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
29 |
|
30 |
## Demo
|
31 |
|
tools/eval/README.md
CHANGED
@@ -146,7 +146,7 @@ python eval.py -m sface -d lfw -dr /path/to/lfw
|
|
146 |
|
147 |
### Prepare data
|
148 |
|
149 |
-
Please visit http://iapr-tc11.org/mediawiki/index.php/ICDAR_2003_Robust_Reading_Competitions to download the ICDAR2003 dataset and the labels.
|
150 |
|
151 |
```shell
|
152 |
$ tree -L 2 /path/to/icdar
|
@@ -199,6 +199,20 @@ python eval.py -m crnn -d iiit5k -dr /path/to/iiit5k
|
|
199 |
### Prepare data
|
200 |
Please download the mini_supervisely data from [here](https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip) which includes the validation dataset and unzip it.
|
201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
### Evaluation
|
203 |
|
204 |
Run evaluation with the following command :
|
|
|
146 |
|
147 |
### Prepare data
|
148 |
|
149 |
+
Please visit http://iapr-tc11.org/mediawiki/index.php/ICDAR_2003_Robust_Reading_Competitions to download the ICDAR2003 dataset and the labels. You have to download the Robust Word Recognition [TrialTrain Set](http://www.iapr-tc11.org/dataset/ICDAR2003_RobustReading/TrialTrain/word.zip) only.
|
150 |
|
151 |
```shell
|
152 |
$ tree -L 2 /path/to/icdar
|
|
|
199 |
### Prepare data
|
200 |
Please download the mini_supervisely data from [here](https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip) which includes the validation dataset and unzip it.
|
201 |
|
202 |
+
```shell
|
203 |
+
$ tree -L 2 /path/to/mini_supervisely
|
204 |
+
.
|
205 |
+
├── Annotations
|
206 |
+
│ ├── ache-adult-depression-expression-41253.png
|
207 |
+
│ ├── ...
|
208 |
+
├── Images
|
209 |
+
│ ├── ache-adult-depression-expression-41253.jpg
|
210 |
+
│ ├── ...
|
211 |
+
├── test.txt
|
212 |
+
├── train.txt
|
213 |
+
└── val.txt
|
214 |
+
```
|
215 |
+
|
216 |
### Evaluation
|
217 |
|
218 |
Run evaluation with the following command :
|
tools/eval/eval.py
CHANGED
@@ -33,6 +33,12 @@ models = dict(
|
|
33 |
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr_int8.onnx"),
|
34 |
topK=5,
|
35 |
loadLabel=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
mobilenetv2=dict(
|
37 |
name="MobileNet",
|
38 |
topic="image_classification",
|
@@ -45,6 +51,12 @@ models = dict(
|
|
45 |
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr_int8.onnx"),
|
46 |
topK=5,
|
47 |
loadLabel=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
ppresnet=dict(
|
49 |
name="PPResNet",
|
50 |
topic="image_classification",
|
@@ -57,6 +69,12 @@ models = dict(
|
|
57 |
modelPath=os.path.join(root_dir, "models/image_classification_ppresnet/image_classification_ppresnet50_2022jan_int8.onnx"),
|
58 |
topK=5,
|
59 |
loadLabel=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
yunet=dict(
|
61 |
name="YuNet",
|
62 |
topic="face_detection",
|
@@ -71,6 +89,13 @@ models = dict(
|
|
71 |
topK=5000,
|
72 |
confThreshold=0.3,
|
73 |
nmsThreshold=0.45),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
sface=dict(
|
75 |
name="SFace",
|
76 |
topic="face_recognition",
|
@@ -79,6 +104,10 @@ models = dict(
|
|
79 |
name="SFace",
|
80 |
topic="face_recognition",
|
81 |
modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec_int8.onnx")),
|
|
|
|
|
|
|
|
|
82 |
crnn_en=dict(
|
83 |
name="CRNN",
|
84 |
topic="text_recognition",
|
@@ -95,6 +124,10 @@ models = dict(
|
|
95 |
name="PPHumanSeg",
|
96 |
topic="human_segmentation",
|
97 |
modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8.onnx")),
|
|
|
|
|
|
|
|
|
98 |
)
|
99 |
|
100 |
datasets = dict(
|
|
|
33 |
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr_int8.onnx"),
|
34 |
topK=5,
|
35 |
loadLabel=False),
|
36 |
+
mobilenetv1_bq=dict(
|
37 |
+
name="MobileNet",
|
38 |
+
topic="image_classification",
|
39 |
+
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr_int8bq.onnx"),
|
40 |
+
topK=5,
|
41 |
+
loadLabel=False),
|
42 |
mobilenetv2=dict(
|
43 |
name="MobileNet",
|
44 |
topic="image_classification",
|
|
|
51 |
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr_int8.onnx"),
|
52 |
topK=5,
|
53 |
loadLabel=False),
|
54 |
+
mobilenetv2_bq=dict(
|
55 |
+
name="MobileNet",
|
56 |
+
topic="image_classification",
|
57 |
+
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr_int8bq.onnx"),
|
58 |
+
topK=5,
|
59 |
+
loadLabel=False),
|
60 |
ppresnet=dict(
|
61 |
name="PPResNet",
|
62 |
topic="image_classification",
|
|
|
69 |
modelPath=os.path.join(root_dir, "models/image_classification_ppresnet/image_classification_ppresnet50_2022jan_int8.onnx"),
|
70 |
topK=5,
|
71 |
loadLabel=False),
|
72 |
+
ppresnet_bq=dict(
|
73 |
+
name="PPResNet",
|
74 |
+
topic="image_classification",
|
75 |
+
modelPath=os.path.join(root_dir, "models/image_classification_ppresnet/image_classification_ppresnet50_2022jan_int8bq.onnx"),
|
76 |
+
topK=5,
|
77 |
+
loadLabel=False),
|
78 |
yunet=dict(
|
79 |
name="YuNet",
|
80 |
topic="face_detection",
|
|
|
89 |
topK=5000,
|
90 |
confThreshold=0.3,
|
91 |
nmsThreshold=0.45),
|
92 |
+
yunet_bq=dict(
|
93 |
+
name="YuNet",
|
94 |
+
topic="face_detection",
|
95 |
+
modelPath=os.path.join(root_dir, "models/face_detection_yunet/face_detection_yunet_2023mar_int8bq.onnx"),
|
96 |
+
topK=5000,
|
97 |
+
confThreshold=0.3,
|
98 |
+
nmsThreshold=0.45),
|
99 |
sface=dict(
|
100 |
name="SFace",
|
101 |
topic="face_recognition",
|
|
|
104 |
name="SFace",
|
105 |
topic="face_recognition",
|
106 |
modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec_int8.onnx")),
|
107 |
+
sface_bq=dict(
|
108 |
+
name="SFace",
|
109 |
+
topic="face_recognition",
|
110 |
+
modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec_int8bq.onnx")),
|
111 |
crnn_en=dict(
|
112 |
name="CRNN",
|
113 |
topic="text_recognition",
|
|
|
124 |
name="PPHumanSeg",
|
125 |
topic="human_segmentation",
|
126 |
modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8.onnx")),
|
127 |
+
pphumanseg_bq=dict(
|
128 |
+
name="PPHumanSeg",
|
129 |
+
topic="human_segmentation",
|
130 |
+
modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8bq.onnx")),
|
131 |
)
|
132 |
|
133 |
datasets = dict(
|
tools/quantize/README.md
CHANGED
@@ -54,6 +54,8 @@ python quantize-inc.py model1
|
|
54 |
|
55 |
## Blockwise quantization usage
|
56 |
|
|
|
|
|
57 |
`block_quantize.py` requires Python>=3.7
|
58 |
|
59 |
To perform weight-only blockwise quantization:
|
|
|
54 |
|
55 |
## Blockwise quantization usage
|
56 |
|
57 |
+
Block-quantized models under each model directory are generated with `--block_size=64`
|
58 |
+
|
59 |
`block_quantize.py` requires Python>=3.7
|
60 |
|
61 |
To perform weight-only blockwise quantization:
|
tools/quantize/block_quantize.py
CHANGED
@@ -8,7 +8,8 @@ if sys.version_info < MIN_PYTHON_VERSION:
|
|
8 |
import argparse
|
9 |
import os
|
10 |
from dataclasses import dataclass, field
|
11 |
-
from typing import
|
|
|
12 |
|
13 |
import numpy as np
|
14 |
import onnx
|
@@ -22,12 +23,19 @@ SUPPORTED_OPS = {"Conv", "Gemm", "MatMul"}
|
|
22 |
ONNX_OPSET = 21
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
@dataclass
|
26 |
class BlockQuantizeConfig:
|
27 |
input_model_path: str
|
28 |
output_model_path: str
|
29 |
block_size: int
|
30 |
bits: int
|
|
|
31 |
|
32 |
|
33 |
@dataclass
|
@@ -75,9 +83,13 @@ def block_quantize_tensor(
|
|
75 |
y_scale_elementwise = np.repeat(scale, repeats=repeats, axis=block_axis)
|
76 |
y_zero_point_elementwise = np.repeat(zero_point, repeats=repeats, axis=block_axis)
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
81 |
|
82 |
return y
|
83 |
|
@@ -129,6 +141,13 @@ class BlockQuantizer:
|
|
129 |
self.initializers_map = {
|
130 |
init.name: init for init in self.model.graph.initializer
|
131 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
def validate_conf(self):
|
134 |
if not os.path.isfile(self.conf.input_model_path):
|
@@ -155,34 +174,59 @@ class BlockQuantizer:
|
|
155 |
f"Bits must be one of the following values: [{allowed_values}]."
|
156 |
)
|
157 |
|
158 |
-
def
|
159 |
if name in self.initializers_map:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
return onnx.numpy_helper.to_array(self.initializers_map[name])
|
|
|
|
|
|
|
|
|
161 |
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
def compute_scale_zeropoint(
|
165 |
self, b_min: np.ndarray, b_max: np.ndarray
|
166 |
) -> Tuple[np.ndarray, np.ndarray]:
|
167 |
assert (
|
168 |
-
b_min
|
169 |
-
).all(),
|
170 |
-
"minimum must be lower than maximum when computing scale and zero point"
|
171 |
-
)
|
172 |
|
173 |
# zero must be present in the range, this enforces qmin <= zero_point <= qmax
|
174 |
b_min = np.minimum(b_min, np.zeros_like(b_min, dtype=b_min.dtype))
|
175 |
b_max = np.maximum(b_max, np.zeros_like(b_max, dtype=b_max.dtype))
|
176 |
|
177 |
-
|
178 |
-
|
|
|
179 |
|
180 |
dq = qmax - qmin
|
181 |
|
182 |
-
scales = (b_max - b_min) / dq
|
183 |
-
|
184 |
-
|
185 |
-
)
|
186 |
|
187 |
return (scales, zeropoints)
|
188 |
|
@@ -221,7 +265,8 @@ class BlockQuantizer:
|
|
221 |
quantized_weight, quantization_axis, scales, zeropoints
|
222 |
)
|
223 |
|
224 |
-
|
|
|
225 |
|
226 |
res = BlockQuantizeResult(
|
227 |
quantized_weight,
|
@@ -241,16 +286,32 @@ class BlockQuantizer:
|
|
241 |
|
242 |
return size_mb
|
243 |
|
244 |
-
def display_summary(self, sqe:
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
original_model_size = self.get_model_size(self.conf.input_model_path)
|
247 |
quantized_model_size = self.get_model_size(self.conf.output_model_path)
|
248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
print("Done! Results saved in", self.conf.output_model_path)
|
250 |
print("\nSummary of Results:\n")
|
251 |
print(f"{'Metric':<30} {'Value':<10}")
|
252 |
print(f"{'-'*40}")
|
253 |
-
print(f"{'
|
254 |
print(f"{'Original Model Size (KB)':<31} {original_model_size:,.2f}")
|
255 |
print(f"{'Block-Quantized Model Size (KB)':<30} {quantized_model_size:,.2f}")
|
256 |
|
@@ -258,7 +319,7 @@ class BlockQuantizer:
|
|
258 |
print("Quantizing the model...")
|
259 |
|
260 |
quantized_inputs = []
|
261 |
-
sqe =
|
262 |
|
263 |
node_idx = 0
|
264 |
|
@@ -267,7 +328,13 @@ class BlockQuantizer:
|
|
267 |
|
268 |
if node.op_type in SUPPORTED_OPS:
|
269 |
for input_idx, input_name in enumerate(node.input):
|
270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
quantized_weights_name = f"{input_name}_quantized"
|
273 |
quantized_node_name = f"{input_name}_quantized_node"
|
@@ -279,9 +346,8 @@ class BlockQuantizer:
|
|
279 |
shape_name = f"{input_name}_shape"
|
280 |
reshaped_weights_name = f"{input_name}_reshaped"
|
281 |
|
282 |
-
# Skip quantization if weights
|
283 |
-
|
284 |
-
if weight is None or weight.size < self.conf.block_size:
|
285 |
continue
|
286 |
|
287 |
reshape_needed = weight.ndim > 2
|
@@ -295,9 +361,15 @@ class BlockQuantizer:
|
|
295 |
)
|
296 |
continue
|
297 |
|
298 |
-
|
299 |
block_quantize_res = self.block_quantize(weight)
|
300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
dequantize_node = create_dequantize_node(
|
302 |
quantized_node_name,
|
303 |
quantized_weights_name,
|
@@ -352,14 +424,7 @@ class BlockQuantizer:
|
|
352 |
]
|
353 |
)
|
354 |
|
355 |
-
|
356 |
-
self.graph.initializer.remove(
|
357 |
-
next(
|
358 |
-
init
|
359 |
-
for init in self.graph.initializer
|
360 |
-
if init.name == input_name
|
361 |
-
)
|
362 |
-
)
|
363 |
|
364 |
node.input[input_idx] = (
|
365 |
reshaped_weights_name
|
@@ -374,11 +439,12 @@ class BlockQuantizer:
|
|
374 |
|
375 |
self.graph.node.insert(0, dequantize_node)
|
376 |
node_idx += 1
|
377 |
-
|
|
|
378 |
self.graph.value_info.insert(0, dequantized_weights_info)
|
379 |
|
380 |
-
sqe
|
381 |
-
|
382 |
node_idx += 1
|
383 |
|
384 |
onnx.checker.check_model(self.model, full_check=True)
|
@@ -421,6 +487,13 @@ def setup_args() -> argparse.Namespace:
|
|
421 |
default="block_quantized_model.onnx",
|
422 |
required=False,
|
423 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
return parser.parse_args()
|
426 |
|
@@ -433,6 +506,7 @@ if __name__ == "__main__":
|
|
433 |
output_model_path=args.output_model,
|
434 |
block_size=args.block_size,
|
435 |
bits=args.bits,
|
|
|
436 |
)
|
437 |
|
438 |
quantizer = BlockQuantizer(quantization_config)
|
|
|
8 |
import argparse
|
9 |
import os
|
10 |
from dataclasses import dataclass, field
|
11 |
+
from typing import Dict, Tuple
|
12 |
+
from enum import Enum, auto
|
13 |
|
14 |
import numpy as np
|
15 |
import onnx
|
|
|
23 |
ONNX_OPSET = 21
|
24 |
|
25 |
|
26 |
+
class WeightCategory(Enum):
|
27 |
+
INITIALIZER = auto()
|
28 |
+
CONSTANT = auto()
|
29 |
+
NONE = auto()
|
30 |
+
|
31 |
+
|
32 |
@dataclass
|
33 |
class BlockQuantizeConfig:
|
34 |
input_model_path: str
|
35 |
output_model_path: str
|
36 |
block_size: int
|
37 |
bits: int
|
38 |
+
verbose: bool
|
39 |
|
40 |
|
41 |
@dataclass
|
|
|
83 |
y_scale_elementwise = np.repeat(scale, repeats=repeats, axis=block_axis)
|
84 |
y_zero_point_elementwise = np.repeat(zero_point, repeats=repeats, axis=block_axis)
|
85 |
|
86 |
+
type_info = np.iinfo(BITS_TO_NUMPY_TYPE[n_bits])
|
87 |
+
min_value = type_info.min
|
88 |
+
max_value = type_info.max
|
89 |
+
|
90 |
+
y = np.rint(x / y_scale_elementwise + y_zero_point_elementwise)
|
91 |
+
y = np.clip(y, min_value, max_value)
|
92 |
+
y = y.astype(BITS_TO_NUMPY_TYPE[n_bits])
|
93 |
|
94 |
return y
|
95 |
|
|
|
141 |
self.initializers_map = {
|
142 |
init.name: init for init in self.model.graph.initializer
|
143 |
}
|
144 |
+
self.costants_map = {
|
145 |
+
node.output[0]: next(
|
146 |
+
attr.t for attr in node.attribute if attr.name == "value"
|
147 |
+
)
|
148 |
+
for node in self.model.graph.node
|
149 |
+
if node.op_type == "Constant"
|
150 |
+
}
|
151 |
|
152 |
def validate_conf(self):
|
153 |
if not os.path.isfile(self.conf.input_model_path):
|
|
|
174 |
f"Bits must be one of the following values: [{allowed_values}]."
|
175 |
)
|
176 |
|
177 |
+
def get_weight_category(self, name: str) -> WeightCategory:
|
178 |
if name in self.initializers_map:
|
179 |
+
return WeightCategory.INITIALIZER
|
180 |
+
if name in self.costants_map:
|
181 |
+
return WeightCategory.CONSTANT
|
182 |
+
else:
|
183 |
+
return WeightCategory.NONE
|
184 |
+
|
185 |
+
def get_weight_tensor(self, name: str, category: WeightCategory) -> np.ndarray:
|
186 |
+
if category == WeightCategory.INITIALIZER:
|
187 |
return onnx.numpy_helper.to_array(self.initializers_map[name])
|
188 |
+
elif category == WeightCategory.CONSTANT:
|
189 |
+
return onnx.numpy_helper.to_array(self.costants_map[name])
|
190 |
+
else:
|
191 |
+
raise AssertionError("Invalid weight category")
|
192 |
|
193 |
+
def remove_fp32_weights(self, name: str, category: WeightCategory):
|
194 |
+
if category == WeightCategory.INITIALIZER:
|
195 |
+
self.graph.initializer.remove(
|
196 |
+
next(init for init in self.graph.initializer if init.name == name)
|
197 |
+
)
|
198 |
+
elif category == WeightCategory.CONSTANT:
|
199 |
+
self.graph.node.remove(
|
200 |
+
next(
|
201 |
+
node
|
202 |
+
for node in self.graph.node
|
203 |
+
if node.op_type == "Constant" and node.output[0] == name
|
204 |
+
)
|
205 |
+
)
|
206 |
+
else:
|
207 |
+
raise AssertionError("Invalid weight category")
|
208 |
|
209 |
def compute_scale_zeropoint(
|
210 |
self, b_min: np.ndarray, b_max: np.ndarray
|
211 |
) -> Tuple[np.ndarray, np.ndarray]:
|
212 |
assert (
|
213 |
+
b_min <= b_max
|
214 |
+
).all(), "minimum must not be greater than maximum when computing scale and zero point"
|
|
|
|
|
215 |
|
216 |
# zero must be present in the range, this enforces qmin <= zero_point <= qmax
|
217 |
b_min = np.minimum(b_min, np.zeros_like(b_min, dtype=b_min.dtype))
|
218 |
b_max = np.maximum(b_max, np.zeros_like(b_max, dtype=b_max.dtype))
|
219 |
|
220 |
+
type_info = np.iinfo(BITS_TO_NUMPY_TYPE[self.conf.bits])
|
221 |
+
qmin = type_info.min
|
222 |
+
qmax = type_info.max
|
223 |
|
224 |
dq = qmax - qmin
|
225 |
|
226 |
+
scales = np.where(b_max != b_min, (b_max - b_min) / dq, 1.0)
|
227 |
+
|
228 |
+
zeropoints = np.where(b_max != b_min, np.rint(qmin - b_min / scales), 0.0)
|
229 |
+
zeropoints = zeropoints.astype(BITS_TO_NUMPY_TYPE[self.conf.bits])
|
230 |
|
231 |
return (scales, zeropoints)
|
232 |
|
|
|
265 |
quantized_weight, quantization_axis, scales, zeropoints
|
266 |
)
|
267 |
|
268 |
+
# Relative Norm
|
269 |
+
qerror = np.linalg.norm(reconstructed_mat - weight) / (np.linalg.norm(weight) + 1e-10)
|
270 |
|
271 |
res = BlockQuantizeResult(
|
272 |
quantized_weight,
|
|
|
286 |
|
287 |
return size_mb
|
288 |
|
289 |
+
def display_summary(self, sqe: Dict[str, int]):
|
290 |
+
sqe_v = list(sqe.values())
|
291 |
+
if len(sqe_v) == 0:
|
292 |
+
mse = 0
|
293 |
+
print(
|
294 |
+
"Warning: No weights have been quantized, likely due to unsupported layers."
|
295 |
+
)
|
296 |
+
else:
|
297 |
+
mse = sum(sqe_v) / len(sqe_v)
|
298 |
original_model_size = self.get_model_size(self.conf.input_model_path)
|
299 |
quantized_model_size = self.get_model_size(self.conf.output_model_path)
|
300 |
|
301 |
+
if self.conf.verbose:
|
302 |
+
sorted_sqe = sorted(sqe.items(), key=lambda item: item[1], reverse=True)
|
303 |
+
longest_key_len = max(len(key) for key in sqe.keys())
|
304 |
+
|
305 |
+
print("Quantization error (Relative Norm) sorted in ascending order:")
|
306 |
+
|
307 |
+
for key, value in sorted_sqe:
|
308 |
+
print(f"{key:<{longest_key_len}} : {value}")
|
309 |
+
|
310 |
print("Done! Results saved in", self.conf.output_model_path)
|
311 |
print("\nSummary of Results:\n")
|
312 |
print(f"{'Metric':<30} {'Value':<10}")
|
313 |
print(f"{'-'*40}")
|
314 |
+
print(f"{'Relative Norm Error':<31} {mse:.6f}")
|
315 |
print(f"{'Original Model Size (KB)':<31} {original_model_size:,.2f}")
|
316 |
print(f"{'Block-Quantized Model Size (KB)':<30} {quantized_model_size:,.2f}")
|
317 |
|
|
|
319 |
print("Quantizing the model...")
|
320 |
|
321 |
quantized_inputs = []
|
322 |
+
sqe = {}
|
323 |
|
324 |
node_idx = 0
|
325 |
|
|
|
328 |
|
329 |
if node.op_type in SUPPORTED_OPS:
|
330 |
for input_idx, input_name in enumerate(node.input):
|
331 |
+
weightCategory = self.get_weight_category(input_name)
|
332 |
+
|
333 |
+
# Skip quantization if weights are taken as external input
|
334 |
+
if weightCategory == WeightCategory.NONE:
|
335 |
+
continue
|
336 |
+
|
337 |
+
weight = self.get_weight_tensor(input_name, weightCategory)
|
338 |
|
339 |
quantized_weights_name = f"{input_name}_quantized"
|
340 |
quantized_node_name = f"{input_name}_quantized_node"
|
|
|
346 |
shape_name = f"{input_name}_shape"
|
347 |
reshaped_weights_name = f"{input_name}_reshaped"
|
348 |
|
349 |
+
# Skip quantization if weights don't contain enough elements to create at least 1 block
|
350 |
+
if weight.size < self.conf.block_size:
|
|
|
351 |
continue
|
352 |
|
353 |
reshape_needed = weight.ndim > 2
|
|
|
361 |
)
|
362 |
continue
|
363 |
|
364 |
+
|
365 |
block_quantize_res = self.block_quantize(weight)
|
366 |
|
367 |
+
# Skip quantization if it wouldn't reduce the model size
|
368 |
+
if block_quantize_res.block_size == 1:
|
369 |
+
continue
|
370 |
+
|
371 |
+
quantized_inputs.append(input_name)
|
372 |
+
|
373 |
dequantize_node = create_dequantize_node(
|
374 |
quantized_node_name,
|
375 |
quantized_weights_name,
|
|
|
424 |
]
|
425 |
)
|
426 |
|
427 |
+
self.remove_fp32_weights(input_name, weightCategory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
|
429 |
node.input[input_idx] = (
|
430 |
reshaped_weights_name
|
|
|
439 |
|
440 |
self.graph.node.insert(0, dequantize_node)
|
441 |
node_idx += 1
|
442 |
+
if reshape_needed:
|
443 |
+
self.graph.value_info.insert(0, shape_info)
|
444 |
self.graph.value_info.insert(0, dequantized_weights_info)
|
445 |
|
446 |
+
sqe[input_name] = block_quantize_res.quantization_error
|
447 |
+
|
448 |
node_idx += 1
|
449 |
|
450 |
onnx.checker.check_model(self.model, full_check=True)
|
|
|
487 |
default="block_quantized_model.onnx",
|
488 |
required=False,
|
489 |
)
|
490 |
+
parser.add_argument(
|
491 |
+
"-v",
|
492 |
+
"--verbose",
|
493 |
+
action="store_true",
|
494 |
+
help="Enable verbose output",
|
495 |
+
required=False,
|
496 |
+
)
|
497 |
|
498 |
return parser.parse_args()
|
499 |
|
|
|
506 |
output_model_path=args.output_model,
|
507 |
block_size=args.block_size,
|
508 |
bits=args.bits,
|
509 |
+
verbose=args.verbose
|
510 |
)
|
511 |
|
512 |
quantizer = BlockQuantizer(quantization_config)
|