Zhang-Yang-Sustech commited on
Commit
0bf46d3
·
1 Parent(s): 92f2071

Migrating the EfficientSAM model to the OpenCV model zoo (#258)

Browse files

* a

* add efficientsam model and basic demo

* update license

* remove example images

* update readme

* update readme

* update demo

* update demo

* update readme

* update SAM and __init__

* update demo and sam

* update label

* add present gif

* update readme

* add efficientSAM gif to readme of opencvzoo

* cv version 4.10.0, remove camera branch

README.md CHANGED
@@ -73,6 +73,10 @@ Some examples are listed below. You can find more in the directory of each model
73
 
74
  ![messi](./models/human_segmentation_pphumanseg/example_outputs/messi.jpg)
75
 
 
 
 
 
76
  ### License Plate Detection with [LPD_YuNet](./models/license_plate_detection_yunet/)
77
 
78
  ![license plate detection](./models/license_plate_detection_yunet/example_outputs/lpd_yunet_demo.gif)
 
73
 
74
  ![messi](./models/human_segmentation_pphumanseg/example_outputs/messi.jpg)
75
 
76
+ ### Image Segmentation with [EfficientSAM](./models/image_segmentation_efficientsam/)
77
+
78
+ ![sam_present](./models/image_segmentation_efficientsam/example_outputs/sam_present.gif)
79
+
80
  ### License Plate Detection with [LPD_YuNet](./models/license_plate_detection_yunet/)
81
 
82
  ![license plate detection](./models/license_plate_detection_yunet/example_outputs/lpd_yunet_demo.gif)
models/__init__.py CHANGED
@@ -20,6 +20,7 @@ from .object_detection_yolox.yolox import YoloX
20
  from .facial_expression_recognition.facial_fer_model import FacialExpressionRecog
21
  from .object_tracking_vittrack.vittrack import VitTrack
22
  from .text_detection_ppocr.ppocr_det import PPOCRDet
 
23
 
24
  class ModuleRegistery:
25
  def __init__(self, name):
@@ -94,3 +95,4 @@ MODELS.register(YoloX)
94
  MODELS.register(FacialExpressionRecog)
95
  MODELS.register(VitTrack)
96
  MODELS.register(PPOCRDet)
 
 
20
  from .facial_expression_recognition.facial_fer_model import FacialExpressionRecog
21
  from .object_tracking_vittrack.vittrack import VitTrack
22
  from .text_detection_ppocr.ppocr_det import PPOCRDet
23
+ from .image_segmentation_efficientsam.efficientSAM import EfficientSAM
24
 
25
  class ModuleRegistery:
26
  def __init__(self, name):
 
95
  MODELS.register(FacialExpressionRecog)
96
  MODELS.register(VitTrack)
97
  MODELS.register(PPOCRDet)
98
+ MODELS.register(EfficientSAM)
models/image_segmentation_efficientsam/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
models/image_segmentation_efficientsam/README.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # image_segmentation_efficientsam
2
+
3
+ EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything
4
+
5
+ Notes:
6
+ - The current implementation of the EfficientSAM demo uses the EfficientSAM-Ti model, which is specifically tailored for scenarios requiring higher speed and lightweight.
7
+ - MD5 value of "efficient_sam_vitt.pt" is 7A804DA508F30EFC59EC06711C8DCD62
8
+ - SHA-256 value of "efficient_sam_vitt.pt" is DFF858B19600A46461CBB7DE98F796B23A7A888D9F5E34C0B033F7D6EB9E4E6A
9
+
10
+
11
+ ## Demo
12
+
13
+ ### Python
14
+ Run the following command to try the demo:
15
+
16
+ ```shell
17
+ python demo.py --input /path/to/image
18
+ ```
19
+
20
+ Click only **once** on the object you wish to segment in the displayed image. After the click, the segmentation result will be shown in a new window.
21
+
22
+ ## Result
23
+
24
+ Here are some of the sample results that were observed using the model:
25
+
26
+ ![test1_res.jpg](./example_outputs/example1.png)
27
+ ![test2_res.jpg](./example_outputs/example2.png)
28
+
29
+ Video inference result:
30
+
31
+ ![sam_present.gif](./example_outputs/sam_present.gif)
32
+
33
+ ## Model metrics:
34
+
35
+ ## License
36
+
37
+ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
38
+
39
+ #### Contributor Details
40
+
41
+ ## Reference
42
+
43
+ - https://arxiv.org/abs/2312.00863
44
+ - https://github.com/yformer/EfficientSAM
models/image_segmentation_efficientsam/demo.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import numpy as np
3
+ import cv2 as cv
4
+ from efficientSAM import EfficientSAM
5
+
6
+ # Check OpenCV version
7
+ assert cv.__version__ >= "4.10.0", \
8
+ "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
9
+
10
+ # Valid combinations of backends and targets
11
+ backend_target_pairs = [
12
+ [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
13
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
14
+ [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
15
+ [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
16
+ [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
17
+ ]
18
+
19
+ parser = argparse.ArgumentParser(description='EfficientSAM Demo')
20
+ parser.add_argument('--input', '-i', type=str,
21
+ help='Set input path to a certain image.')
22
+ parser.add_argument('--model', '-m', type=str, default='image_segmentation_efficientsam_ti_2024may.onnx',
23
+ help='Set model path, defaults to image_segmentation_efficientsam_ti_2024may.onnx.')
24
+ parser.add_argument('--backend_target', '-bt', type=int, default=0,
25
+ help='''Choose one of the backend-target pair to run this demo:
26
+ {:d}: (default) OpenCV implementation + CPU,
27
+ {:d}: CUDA + GPU (CUDA),
28
+ {:d}: CUDA + GPU (CUDA FP16),
29
+ {:d}: TIM-VX + NPU,
30
+ {:d}: CANN + NPU
31
+ '''.format(*[x for x in range(len(backend_target_pairs))]))
32
+ parser.add_argument('--save', '-s', action='store_true',
33
+ help='Specify to save a file with results. Invalid in case of camera input.')
34
+ args = parser.parse_args()
35
+
36
+ #global click listener
37
+ clicked_left = False
38
+ #global point record in the window
39
+ point = []
40
+
41
+ def visualize(image, result):
42
+ """
43
+ Visualize the inference result on the input image.
44
+
45
+ Args:
46
+ image (np.ndarray): The input image.
47
+ result (np.ndarray): The inference result.
48
+
49
+ Returns:
50
+ vis_result (np.ndarray): The visualized result.
51
+ """
52
+ # get image and mask
53
+ vis_result = np.copy(image)
54
+ mask = np.copy(result)
55
+ # change mask to binary image
56
+ t, binary = cv.threshold(mask, 127, 255, cv.THRESH_BINARY)
57
+ assert set(np.unique(binary)) <= {0, 255}, "The mask must be a binary image"
58
+ # enhance red channel to make the segmentation more obviously
59
+ enhancement_factor = 1.8
60
+ red_channel = vis_result[:, :, 2]
61
+ # update the channel
62
+ red_channel = np.where(binary == 255, np.minimum(red_channel * enhancement_factor, 255), red_channel)
63
+ vis_result[:, :, 2] = red_channel
64
+
65
+ # draw borders
66
+ contours, hierarchy = cv.findContours(binary, cv.RETR_LIST, cv.CHAIN_APPROX_TC89_L1)
67
+ cv.drawContours(vis_result, contours, contourIdx = -1, color = (255,255,255), thickness=2)
68
+ return vis_result
69
+
70
+ def select(event, x, y, flags, param):
71
+ global clicked_left
72
+ # When the left mouse button is pressed, record the coordinates of the point where it is pressed
73
+ if event == cv.EVENT_LBUTTONUP:
74
+ point.append([x,y])
75
+ print("point:",point[0])
76
+ clicked_left = True
77
+
78
+ if __name__ == '__main__':
79
+ backend_id = backend_target_pairs[args.backend_target][0]
80
+ target_id = backend_target_pairs[args.backend_target][1]
81
+ # Load the EfficientSAM model
82
+ model = EfficientSAM(modelPath=args.model)
83
+
84
+ if args.input is not None:
85
+ # Read image
86
+ image = cv.imread(args.input)
87
+ if image is None:
88
+ print('Could not open or find the image:', args.input)
89
+ exit(0)
90
+ # create window
91
+ image_window = "image: click on the thing whick you want to segment!"
92
+ cv.namedWindow(image_window, cv.WINDOW_NORMAL)
93
+ # change window size
94
+ cv.resizeWindow(image_window, 800 if image.shape[0] > 800 else image.shape[0], 600 if image.shape[1] > 600 else image.shape[1])
95
+ # put the window on the left of the screen
96
+ cv.moveWindow(image_window, 50, 100)
97
+ # set listener to record user's click point
98
+ cv.setMouseCallback(image_window, select)
99
+ # tips in the terminal
100
+ print("click the picture on the LEFT and see the result on the RIGHT!")
101
+ # show image
102
+ cv.imshow(image_window, image)
103
+ # waiting for click
104
+ while cv.waitKey(1) == -1 or clicked_left:
105
+ # receive click
106
+ if clicked_left:
107
+ # put the click point (x,y) into the model to predict
108
+ result = model.infer(image=image, points=point, labels=[1])
109
+ # get the visualized result
110
+ vis_result = visualize(image, result)
111
+ # create window to show visualized result
112
+ cv.namedWindow("vis_result", cv.WINDOW_NORMAL)
113
+ cv.resizeWindow("vis_result", 800 if vis_result.shape[0] > 800 else vis_result.shape[0], 600 if vis_result.shape[1] > 600 else vis_result.shape[1])
114
+ cv.moveWindow("vis_result", 851, 100)
115
+ cv.imshow("vis_result", vis_result)
116
+ # set click false to listen another click
117
+ clicked_left = False
118
+ elif cv.getWindowProperty(image_window, cv.WND_PROP_VISIBLE) < 1:
119
+ # if click × to close the image window then ending
120
+ break
121
+ else:
122
+ # when not clicked, set point to empty
123
+ point = []
124
+ cv.destroyAllWindows()
125
+
126
+ # Save results if save is true
127
+ if args.save:
128
+ cv.imwrite('./example_outputs/vis_result.jpg', vis_result)
129
+ cv.imwrite("./example_outputs/mask.jpg", result)
130
+ print('vis_result.jpg and mask.jpg are saved to ./example_outputs/')
131
+
132
+
133
+ else:
134
+ print('Set input path to a certain image.')
135
+ pass
136
+
models/image_segmentation_efficientsam/efficientSAM.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2 as cv
3
+
4
+ class EfficientSAM:
5
+ def __init__(self, modelPath, backendId=0, targetId=0):
6
+ self._modelPath = modelPath
7
+ self._backendId = backendId
8
+ self._targetId = targetId
9
+
10
+ self._model = cv.dnn.readNet(self._modelPath)
11
+ self._model.setPreferableBackend(self._backendId)
12
+ self._model.setPreferableTarget(self._targetId)
13
+ # 3 inputs
14
+ self._inputNames = ["batched_images", "batched_point_coords", "batched_point_labels"]
15
+
16
+ self._outputNames = ['output_masks'] # actual output layer name
17
+ self._currentInputSize = None
18
+ self._inputSize = [640, 640] # input size for the model
19
+
20
+ @property
21
+ def name(self):
22
+ return self.__class__.__name__
23
+
24
+ def setBackendAndTarget(self, backendId, targetId):
25
+ self._backendId = backendId
26
+ self._targetId = targetId
27
+ self._model.setPreferableBackend(self._backendId)
28
+ self._model.setPreferableTarget(self._targetId)
29
+
30
+ def _preprocess(self, image, points, labels):
31
+
32
+ image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
33
+ # record the input image size, (width, height)
34
+ self._currentInputSize = (image.shape[1], image.shape[0])
35
+
36
+ image = cv.resize(image, self._inputSize)
37
+
38
+ image = image.astype(np.float32, copy=False) / 255.0
39
+
40
+ # convert points to (640*640) size space
41
+ for p in points:
42
+ p[0] = int(p[0] * self._inputSize[0]/self._currentInputSize[0])
43
+ p[1] = int(p[1]* self._inputSize[1]/self._currentInputSize[1])
44
+
45
+ image_blob = cv.dnn.blobFromImage(image)
46
+
47
+ points_blob = np.array([[points]], dtype=np.float32)
48
+
49
+ labels_blob = np.array([[[labels]]])
50
+
51
+ return image_blob, points_blob, labels_blob
52
+
53
+ def infer(self, image, points, labels):
54
+ # Preprocess
55
+ imageBlob, pointsBlob, labelsBlob = self._preprocess(image, points, labels)
56
+ # Forward
57
+ self._model.setInput(imageBlob, self._inputNames[0])
58
+ self._model.setInput(pointsBlob, self._inputNames[1])
59
+ self._model.setInput(labelsBlob, self._inputNames[2])
60
+ outputBlob = self._model.forward()
61
+ # Postprocess
62
+ results = self._postprocess(outputBlob)
63
+
64
+ return results
65
+
66
+ def _postprocess(self, outputBlob):
67
+ mask = outputBlob[0, 0, 0, :, :] >= 0
68
+
69
+ mask_uint8 = (mask * 255).astype(np.uint8)
70
+ # change to real image size
71
+ mask_uint8 = cv.resize(mask_uint8, dsize=self._currentInputSize, interpolation=2)
72
+
73
+ return mask_uint8