updated text_detection usage and added .gitattributes

Files changed (4) hide show

.gitattributes +26 -0
.gitignore +9 -0
demo.py +4 -5
ppocr_det.py +59 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,26 @@

+# Caffe
+*.caffemodel filter=lfs diff=lfs merge=lfs -text
+# Tensorflow
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pbtxt filter=lfs diff=lfs merge=lfs -text
+# Torch
+*.t7 filter=lfs diff=lfs merge=lfs -text
+*.net filter=lfs diff=lfs merge=lfs -text
+# Darknet
+*.weights filter=lfs diff=lfs merge=lfs -text
+# ONNX
+*.onnx filter=lfs diff=lfs merge=lfs -text
+# NPY
+*.npy filter=lfs diff=lfs merge=lfs -text
+# Images
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+*.pyc
+**/__pycache__
+**/__pycache__/**
+.vscode
+build/
+**/build
+**/build/**

demo.py CHANGED Viewed

@@ -4,11 +4,10 @@
 # Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
 # Third party copyrights are property of their respective owners.
-import sys
 import argparse
 import numpy as np
 import cv2 as cv
 # Check OpenCV version
 opencv_python_version = lambda str_version: tuple(map(int, (str_version.split("."))))
@@ -16,10 +15,10 @@ assert opencv_python_version(cv.__version__) >= opencv_python_version("4.10.0"),
        "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"
 from crnn import CRNN
-sys.path.append('../text_detection_ppocr')
 from ppocr_det import PPOCRDet
 # Valid combinations of backends and targets
 backend_target_pairs = [
     [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
@@ -67,7 +66,7 @@ if __name__ == '__main__':
     target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate PPOCRDet for text detection
-    detector = PPOCRDet(modelPath='../text_detection_ppocr/text_detection_en_ppocrv3_2023may.onnx',
                   inputSize=[args.width, args.height],
                   binaryThreshold=0.3,
                   polygonThreshold=0.5,

 # Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
 # Third party copyrights are property of their respective owners.
 import argparse
 import numpy as np
 import cv2 as cv
+from huggingface_hub import hf_hub_download
 # Check OpenCV version
 opencv_python_version = lambda str_version: tuple(map(int, (str_version.split("."))))
        "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"
 from crnn import CRNN
 from ppocr_det import PPOCRDet
+text_detection_model_path = hf_hub_download(repo_id="opencv/text_detection_ppocr", filename="text_detection_en_ppocrv3_2023may.onnx")
 # Valid combinations of backends and targets
 backend_target_pairs = [
     [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
     target_id = backend_target_pairs[args.backend_target][1]
     # Instantiate PPOCRDet for text detection
+    detector = PPOCRDet(modelPath=text_detection_model_path,
                   inputSize=[args.width, args.height],
                   binaryThreshold=0.3,
                   polygonThreshold=0.5,

ppocr_det.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+import numpy as np
+import cv2 as cv
+class PPOCRDet:
+    def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
+        self._modelPath = modelPath
+        self._model = cv.dnn_TextDetectionModel_DB(
+            cv.dnn.readNet(self._modelPath)
+        )
+        self._inputSize = tuple(inputSize) # (w, h)
+        self._inputHeight = inputSize[0]
+        self._inputWidth = inputSize[1]
+        self._binaryThreshold = binaryThreshold
+        self._polygonThreshold = polygonThreshold
+        self._maxCandidates = maxCandidates
+        self._unclipRatio = unclipRatio
+        self._backendId = backendId
+        self._targetId = targetId
+        self._model.setPreferableBackend(self._backendId)
+        self._model.setPreferableTarget(self._targetId)
+        self._model.setBinaryThreshold(self._binaryThreshold)
+        self._model.setPolygonThreshold(self._polygonThreshold)
+        self._model.setUnclipRatio(self._unclipRatio)
+        self._model.setMaxCandidates(self._maxCandidates)
+        self._model.setInputSize(self._inputSize)
+        self._model.setInputMean((123.675, 116.28, 103.53))
+        self._model.setInputScale(1.0/255.0/np.array([0.229, 0.224, 0.225]))
+    @property
+    def name(self):
+        return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
+        self._model.setPreferableBackend(self._backendId)
+        self._model.setPreferableTarget(self._targetId)
+    def setInputSize(self, input_size):
+        self._inputSize = tuple(input_size)
+        self._model.setInputSize(self._inputSize)
+        self._model.setInputMean((123.675, 116.28, 103.53))
+        self._model.setInputScale(1.0/255.0/np.array([0.229, 0.224, 0.225]))
+    def infer(self, image):
+        assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
+        assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
+        return self._model.detect(image)