Abhishek Gola
commited on
Commit
·
6487ac5
1
Parent(s):
2202d11
updated text_detection usage and added .gitattributes
Browse files- .gitattributes +26 -0
- .gitignore +9 -0
- demo.py +4 -5
- ppocr_det.py +59 -0
.gitattributes
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Caffe
|
3 |
+
*.caffemodel filter=lfs diff=lfs merge=lfs -text
|
4 |
+
|
5 |
+
# Tensorflow
|
6 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.pbtxt filter=lfs diff=lfs merge=lfs -text
|
8 |
+
|
9 |
+
# Torch
|
10 |
+
*.t7 filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.net filter=lfs diff=lfs merge=lfs -text
|
12 |
+
|
13 |
+
# Darknet
|
14 |
+
*.weights filter=lfs diff=lfs merge=lfs -text
|
15 |
+
|
16 |
+
# ONNX
|
17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
18 |
+
|
19 |
+
# NPY
|
20 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
21 |
+
|
22 |
+
# Images
|
23 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.pyc
|
2 |
+
**/__pycache__
|
3 |
+
**/__pycache__/**
|
4 |
+
|
5 |
+
.vscode
|
6 |
+
|
7 |
+
build/
|
8 |
+
**/build
|
9 |
+
**/build/**
|
demo.py
CHANGED
@@ -4,11 +4,10 @@
|
|
4 |
# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
|
5 |
# Third party copyrights are property of their respective owners.
|
6 |
|
7 |
-
import sys
|
8 |
import argparse
|
9 |
-
|
10 |
import numpy as np
|
11 |
import cv2 as cv
|
|
|
12 |
|
13 |
# Check OpenCV version
|
14 |
opencv_python_version = lambda str_version: tuple(map(int, (str_version.split("."))))
|
@@ -16,10 +15,10 @@ assert opencv_python_version(cv.__version__) >= opencv_python_version("4.10.0"),
|
|
16 |
"Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"
|
17 |
|
18 |
from crnn import CRNN
|
19 |
-
|
20 |
-
sys.path.append('../text_detection_ppocr')
|
21 |
from ppocr_det import PPOCRDet
|
22 |
|
|
|
|
|
23 |
# Valid combinations of backends and targets
|
24 |
backend_target_pairs = [
|
25 |
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
@@ -67,7 +66,7 @@ if __name__ == '__main__':
|
|
67 |
target_id = backend_target_pairs[args.backend_target][1]
|
68 |
|
69 |
# Instantiate PPOCRDet for text detection
|
70 |
-
detector = PPOCRDet(modelPath=
|
71 |
inputSize=[args.width, args.height],
|
72 |
binaryThreshold=0.3,
|
73 |
polygonThreshold=0.5,
|
|
|
4 |
# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
|
5 |
# Third party copyrights are property of their respective owners.
|
6 |
|
|
|
7 |
import argparse
|
|
|
8 |
import numpy as np
|
9 |
import cv2 as cv
|
10 |
+
from huggingface_hub import hf_hub_download
|
11 |
|
12 |
# Check OpenCV version
|
13 |
opencv_python_version = lambda str_version: tuple(map(int, (str_version.split("."))))
|
|
|
15 |
"Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"
|
16 |
|
17 |
from crnn import CRNN
|
|
|
|
|
18 |
from ppocr_det import PPOCRDet
|
19 |
|
20 |
+
text_detection_model_path = hf_hub_download(repo_id="opencv/text_detection_ppocr", filename="text_detection_en_ppocrv3_2023may.onnx")
|
21 |
+
|
22 |
# Valid combinations of backends and targets
|
23 |
backend_target_pairs = [
|
24 |
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
|
|
66 |
target_id = backend_target_pairs[args.backend_target][1]
|
67 |
|
68 |
# Instantiate PPOCRDet for text detection
|
69 |
+
detector = PPOCRDet(modelPath=text_detection_model_path,
|
70 |
inputSize=[args.width, args.height],
|
71 |
binaryThreshold=0.3,
|
72 |
polygonThreshold=0.5,
|
ppocr_det.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is part of OpenCV Zoo project.
|
2 |
+
# It is subject to the license terms in the LICENSE file found in the same directory.
|
3 |
+
#
|
4 |
+
# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
|
5 |
+
# Third party copyrights are property of their respective owners.
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import cv2 as cv
|
9 |
+
|
10 |
+
class PPOCRDet:
|
11 |
+
def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
|
12 |
+
self._modelPath = modelPath
|
13 |
+
self._model = cv.dnn_TextDetectionModel_DB(
|
14 |
+
cv.dnn.readNet(self._modelPath)
|
15 |
+
)
|
16 |
+
|
17 |
+
self._inputSize = tuple(inputSize) # (w, h)
|
18 |
+
self._inputHeight = inputSize[0]
|
19 |
+
self._inputWidth = inputSize[1]
|
20 |
+
self._binaryThreshold = binaryThreshold
|
21 |
+
self._polygonThreshold = polygonThreshold
|
22 |
+
self._maxCandidates = maxCandidates
|
23 |
+
self._unclipRatio = unclipRatio
|
24 |
+
self._backendId = backendId
|
25 |
+
self._targetId = targetId
|
26 |
+
|
27 |
+
self._model.setPreferableBackend(self._backendId)
|
28 |
+
self._model.setPreferableTarget(self._targetId)
|
29 |
+
|
30 |
+
self._model.setBinaryThreshold(self._binaryThreshold)
|
31 |
+
self._model.setPolygonThreshold(self._polygonThreshold)
|
32 |
+
self._model.setUnclipRatio(self._unclipRatio)
|
33 |
+
self._model.setMaxCandidates(self._maxCandidates)
|
34 |
+
|
35 |
+
self._model.setInputSize(self._inputSize)
|
36 |
+
self._model.setInputMean((123.675, 116.28, 103.53))
|
37 |
+
self._model.setInputScale(1.0/255.0/np.array([0.229, 0.224, 0.225]))
|
38 |
+
|
39 |
+
@property
|
40 |
+
def name(self):
|
41 |
+
return self.__class__.__name__
|
42 |
+
|
43 |
+
def setBackendAndTarget(self, backendId, targetId):
|
44 |
+
self._backendId = backendId
|
45 |
+
self._targetId = targetId
|
46 |
+
self._model.setPreferableBackend(self._backendId)
|
47 |
+
self._model.setPreferableTarget(self._targetId)
|
48 |
+
|
49 |
+
def setInputSize(self, input_size):
|
50 |
+
self._inputSize = tuple(input_size)
|
51 |
+
self._model.setInputSize(self._inputSize)
|
52 |
+
self._model.setInputMean((123.675, 116.28, 103.53))
|
53 |
+
self._model.setInputScale(1.0/255.0/np.array([0.229, 0.224, 0.225]))
|
54 |
+
|
55 |
+
def infer(self, image):
|
56 |
+
assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
|
57 |
+
assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
|
58 |
+
|
59 |
+
return self._model.detect(image)
|