fengyuentau commited on
Commit
42310ef
·
1 Parent(s): af1afb3

Benchmark framework implementation and 3 models added:

Browse files

* benchmark framework: benchmarks based on configs

* added impl and benchmark for YuNet (face detection)

* added impl and benchmark for DB (text detection)

* added impl and benchmark for CRNN (text recognition)

.gitignore CHANGED
@@ -1,6 +1,5 @@
1
  *.pyc
2
-
3
- benchmark/data
4
- benchmark/data/**
5
 
6
  .vscode
 
1
  *.pyc
2
+ **/__pycache__
3
+ **/__pycache__/**
 
4
 
5
  .vscode
README.md CHANGED
@@ -2,6 +2,38 @@
2
 
3
  A zoo for models tuned for OpenCV DNN with benchmarks on different platforms.
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ## License
6
 
7
- OpenCV Zoo is licensed under the [Apache 2.0 license](./LICENCE). Please refer to the licenses of different models for model weights.
 
2
 
3
  A zoo for models tuned for OpenCV DNN with benchmarks on different platforms.
4
 
5
+ Guidelines:
6
+ - To clone this repo, please install [git-lfs](https://git-lfs.github.com/), run `git lfs install` and use `git lfs clone https://github.com/opencv/opencv_zoo`.
7
+ - To run benchmark on your hardware settings, please refer to [benchmark/README](./benchmark/README.md).
8
+
9
+ ## Models & Benchmarks
10
+
11
+ Hardware Setup:
12
+ - `CPU x86_64`: INTEL CPU i7-5930K @ 3.50GHz, 6 cores, 12 threads.
13
+ - `CPU ARM`: Raspberry 4B, BCM2711B0 @ 1.5GHz (Cortex A-72), 4 cores, 4 threads.
14
+ <!--
15
+ - `GPU CUDA`: NVIDIA Jetson Nano B01, 128-core Maxwell, Quad-core ARM A57 @ 1.43 GHz.
16
+ -->
17
+
18
+ ***Important Notes***:
19
+ - The time data that shown on the following tables presents the time elapsed from preprocess (resize is excluded), to a forward pass of a network, and postprocess to get final results.
20
+ - The time data that shown on the following tables is averaged from a 100-time run.
21
+ - View [benchmark/config](./benchmark/config) for more details on benchmarking different models.
22
+
23
+ <!--
24
+ | Model | Input Size | CPU x86_64 (ms) | CPU ARM (ms) | GPU CUDA (ms) |
25
+ |-------|------------|-----------------|--------------|---------------|
26
+ | [YuNet](./models/face_detection_yunet) | 160x120 | 2.17 | 8.87 | 14.95 |
27
+ | [DB](./models/text_detection_db) | 640x480 | 148.65 | 2759.88 | 218.25 |
28
+ | [CRNN](./models/text_recognition_crnn) | 100x32 | 23.23 | 235.87 | 195.20 |
29
+ -->
30
+ | Model | Input Size | CPU x86_64 (ms) | CPU ARM (ms) |
31
+ |-------|------------|-----------------|--------------|
32
+ | [YuNet](./models/face_detection_yunet) | 160x120 | 2.17 | 8.87 |
33
+ | [DB](./models/text_detection_db) | 640x480 | 148.65 | 2759.88 |
34
+ | [CRNN](./models/text_recognition_crnn) | 100x32 | 23.23 | 235.87 |
35
+
36
+
37
  ## License
38
 
39
+ OpenCV Zoo is licensed under the [Apache 2.0 license](./LICENSE). Please refer to licenses of different models.
benchmark/README.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenCV Zoo Benchmark
2
+
3
+ Benchmarking different models in the zoo.
4
+
5
+ Data for benchmarking will be downloaded and loaded in [data](./data) based on given config.
6
+
7
+ Time is measured from data preprocess (resize is excluded), to a forward pass of a network, and postprocess to get final results. The final time data presented is averaged from a 100-time run.
8
+
9
+ ## Preparation
10
+
11
+ 1. Install `python >= 3.6`.
12
+ 2. Install dependencies: `pip install -r requirements.txt`.
13
+
14
+ ## Benchmarking
15
+
16
+ Run the following command to benchmark on a given config:
17
+
18
+ ```shell
19
+ PYTHONPATH=.. python benchmark.py --cfg ./config/face_detection_yunet.yaml
20
+ ```
21
+
22
+ If you are a Windows user and wants to run in CMD/PowerShell, use this command instead:
23
+ ```shell
24
+ set PYTHONPATH=..
25
+ python benchmark.py --cfg ./config/face_detection_yunet.yaml
26
+ ```
27
+ <!--
28
+ Omit `--cfg` if you want to benchmark all included models:
29
+ ```shell
30
+ PYTHONPATH=.. python benchmark.py
31
+ ```
32
+ -->
benchmark/benchmark.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import argparse
3
+
4
+ import yaml
5
+ import tqdm
6
+ import numpy as np
7
+ import cv2 as cv
8
+
9
+ from models import MODELS
10
+ from download import Downloader
11
+
12
+ parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
13
+ parser.add_argument('--cfg', '-c', type=str,
14
+ help='Benchmarking on the given config.')
15
+ args = parser.parse_args()
16
+
17
+ class Timer:
18
+ def __init__(self):
19
+ self._tm = cv.TickMeter()
20
+
21
+ self._time_record = []
22
+ self._average_time = 0
23
+ self._calls = 0
24
+
25
+ def start(self):
26
+ self._tm.start()
27
+
28
+ def stop(self):
29
+ self._tm.stop()
30
+ self._calls += 1
31
+ self._time_record.append(self._tm.getTimeMilli())
32
+ self._average_time = sum(self._time_record) / self._calls
33
+ self._tm.reset()
34
+
35
+ def reset(self):
36
+ self._time_record = []
37
+ self._average_time = 0
38
+ self._calls = 0
39
+
40
+ def getAverageTime(self):
41
+ return self._average_time
42
+
43
+
44
+ class Benchmark:
45
+ def __init__(self, **kwargs):
46
+ self._fileList = kwargs.pop('fileList', None)
47
+ assert self._fileList, 'fileList cannot be empty'
48
+
49
+ backend_id = kwargs.pop('backend', 'default')
50
+ available_backends = dict(
51
+ default=cv.dnn.DNN_BACKEND_DEFAULT,
52
+ # halide=cv.dnn.DNN_BACKEND_HALIDE,
53
+ # inference_engine=cv.dnn.DNN_BACKEND_INFERENCE_ENGINE,
54
+ opencv=cv.dnn.DNN_BACKEND_OPENCV,
55
+ # vkcom=cv.dnn.DNN_BACKEND_VKCOM,
56
+ cuda=cv.dnn.DNN_BACKEND_CUDA
57
+ )
58
+ self._backend = available_backends[backend_id]
59
+
60
+ target_id = kwargs.pop('target', 'cpu')
61
+ available_targets = dict(
62
+ cpu=cv.dnn.DNN_TARGET_CPU,
63
+ # opencl=cv.dnn.DNN_TARGET_OPENCL,
64
+ # opencl_fp16=cv.dnn.DNN_TARGET_OPENCL_FP16,
65
+ # myriad=cv.dnn.DNN_TARGET_MYRIAD,
66
+ # vulkan=cv.dnn.DNN_TARGET_VULKAN,
67
+ # fpga=cv.dnn.DNN_TARGET_FPGA,
68
+ cuda=cv.dnn.DNN_TARGET_CUDA,
69
+ cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16,
70
+ # hddl=cv.dnn.DNN_TARGET_HDDL
71
+ )
72
+ self._target = available_targets[target_id]
73
+
74
+ self._sizes = kwargs.pop('sizes', None)
75
+ self._repeat = kwargs.pop('repeat', 100)
76
+ self._parentPath = kwargs.pop('parentPath', 'benchmark/data')
77
+ self._useGroundTruth = kwargs.pop('useDetectionLabel', False) # If it is enable, 'sizes' will not work
78
+ assert (self._sizes and not self._useGroundTruth) or (not self._sizes and self._useGroundTruth), 'If \'useDetectionLabel\' is True, \'sizes\' should not exist.'
79
+
80
+ self._timer = Timer()
81
+ self._benchmark_results = dict.fromkeys(self._fileList, dict())
82
+
83
+ if self._useGroundTruth:
84
+ self.loadLabel()
85
+
86
+ def loadLabel(self):
87
+ self._labels = dict.fromkeys(self._fileList, None)
88
+ for imgName in self._fileList:
89
+ self._labels[imgName] = np.loadtxt(os.path.join(self._parentPath, '{}.txt'.format(imgName[:-4])))
90
+
91
+ def run(self, model):
92
+ model.setBackend(self._backend)
93
+ model.setTarget(self._target)
94
+
95
+ for imgName in self._fileList:
96
+ img = cv.imread(os.path.join(self._parentPath, imgName))
97
+ if self._useGroundTruth:
98
+ for idx, gt in enumerate(self._labels[imgName]):
99
+ self._benchmark_results[imgName]['gt{}'.format(idx)] = self._run(
100
+ model,
101
+ img,
102
+ gt,
103
+ pbar_msg=' {}, gt{}'.format(imgName, idx)
104
+ )
105
+ else:
106
+ if self._sizes is None:
107
+ h, w, _ = img.shape
108
+ model.setInputSize([w, h])
109
+ self._benchmark_results[imgName][str([w, h])] = self._run(
110
+ model,
111
+ img,
112
+ pbar_msg=' {}, original size {}'.format(imgName, str([w, h]))
113
+ )
114
+ else:
115
+ for size in self._sizes:
116
+ imgResized = cv.resize(img, size)
117
+ model.setInputSize(size)
118
+ self._benchmark_results[imgName][str(size)] = self._run(
119
+ model,
120
+ imgResized,
121
+ pbar_msg=' {}, size {}'.format(imgName, str(size))
122
+ )
123
+
124
+ def printResults(self):
125
+ print(' Results:')
126
+ for imgName, results in self._benchmark_results.items():
127
+ print(' image: {}'.format(imgName))
128
+ total_latency = 0
129
+ for key, latency in results.items():
130
+ total_latency += latency
131
+ print(' {}, latency: {:.4f} ms'.format(key, latency))
132
+ print(' Average latency: {:.4f} ms'.format(total_latency / len(results)))
133
+
134
+ def _run(self, model, *args, **kwargs):
135
+ self._timer.reset()
136
+ pbar = tqdm.tqdm(range(self._repeat))
137
+ for _ in pbar:
138
+ pbar.set_description(kwargs.get('pbar_msg', None))
139
+
140
+ self._timer.start()
141
+ results = model.infer(*args)
142
+ self._timer.stop()
143
+ return self._timer.getAverageTime()
144
+
145
+
146
+ def build_from_cfg(cfg, registery):
147
+ obj_name = cfg.pop('name')
148
+ obj = registery.get(obj_name)
149
+ return obj(**cfg)
150
+
151
+ def prepend_pythonpath(cfg, key1, key2):
152
+ pythonpath = os.environ['PYTHONPATH']
153
+ if cfg[key1][key2].startswith('/'):
154
+ return
155
+ cfg[key1][key2] = os.path.join(pythonpath, cfg[key1][key2])
156
+
157
+ if __name__ == '__main__':
158
+ assert args.cfg.endswith('yaml'), 'Currently support configs of yaml format only.'
159
+ with open(args.cfg, 'r') as f:
160
+ cfg = yaml.safe_load(f)
161
+
162
+ # prepend PYTHONPATH to each path
163
+ prepend_pythonpath(cfg, key1='Data', key2='parentPath')
164
+ prepend_pythonpath(cfg, key1='Benchmark', key2='parentPath')
165
+ prepend_pythonpath(cfg, key1='Model', key2='modelPath')
166
+
167
+
168
+ # Download data if not exist
169
+ print('Loading data:')
170
+ downloader = Downloader(**cfg['Data'])
171
+ downloader.get()
172
+
173
+ # Instantiate benchmarking
174
+ benchmark = Benchmark(**cfg['Benchmark'])
175
+
176
+ # Instantiate model
177
+ model = build_from_cfg(cfg=cfg['Model'], registery=MODELS)
178
+
179
+ # Run benchmarking
180
+ print('Benchmarking {}:'.format(model.name))
181
+ benchmark.run(model)
182
+ benchmark.printResults()
benchmark/config/face_detection_yunet.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Data:
2
+ name: "Images for Face Detection"
3
+ url: "https://drive.google.com/u/0/uc?id=1lOAliAIeOv4olM65YDzE55kn6XjiX2l6&export=download"
4
+ sha: "0ba67a9cfd60f7fdb65cdb7c55a1ce76c1193df1"
5
+ filename: "face_detection.zip"
6
+ parentPath: "benchmark/data"
7
+
8
+ Benchmark:
9
+ name: "Face Detection Benchmark"
10
+ parentPath: "benchmark/data/face_detection"
11
+ fileList:
12
+ - "group.jpg"
13
+ - "concerts.jpg"
14
+ - "dance.jpg"
15
+ backend: "default"
16
+ target: "cpu"
17
+ sizes: # [w, h], Omit to run at original scale
18
+ - [160, 120]
19
+ - [640, 480]
20
+ repeat: 100 # default 100
21
+
22
+ Model:
23
+ name: "YuNet"
24
+ modelPath: "models/face_detection_yunet/face_detection_yunet.onnx"
25
+ confThreshold: 0.6
26
+ nmsThreshold: 0.3
27
+ topK: 5000
28
+ keepTopK: 750
benchmark/config/text_detection_db.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Data:
2
+ name: "Images for Text Detection"
3
+ url: "https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download"
4
+ sha: "a40cf095ceb77159ddd2a5902f3b4329696dd866"
5
+ filename: "text.zip"
6
+ parentPath: "benchmark/data"
7
+
8
+ Benchmark:
9
+ name: "Text Detection Benchmark"
10
+ parentPath: "benchmark/data/text"
11
+ fileList:
12
+ - "1.jpg"
13
+ - "2.jpg"
14
+ - "3.jpg"
15
+ backend: "default"
16
+ target: "cpu"
17
+ sizes: # [w, h], default original scale
18
+ - [640, 480]
19
+ repeat: 100
20
+
21
+ Model:
22
+ name: "DB"
23
+ modelPath: "models/text_detection_db/text_detection_db.onnx"
24
+ binaryThreshold: 0.3
25
+ polygonThreshold: 0.5
26
+ maxCandidates: 200
27
+ unclipRatio: 2.0
benchmark/config/text_recognition_crnn.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Data:
2
+ name: "Images for Text Detection"
3
+ url: "https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download"
4
+ sha: "a40cf095ceb77159ddd2a5902f3b4329696dd866"
5
+ filename: "text.zip"
6
+ parentPath: "benchmark/data"
7
+
8
+ Benchmark:
9
+ name: "Text Recognition Benchmark"
10
+ parentPath: "benchmark/data/text"
11
+ fileList:
12
+ - "1.jpg"
13
+ - "2.jpg"
14
+ - "3.jpg"
15
+ backend: "default"
16
+ target: "cpu"
17
+ useDetectionLabel: True
18
+ repeat: 100
19
+
20
+ Model:
21
+ name: "CRNN"
22
+ modelPath: "models/text_recognition_crnn/text_recognition_crnn.onnx"
benchmark/data/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore
benchmark/download.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import os
3
+ import sys
4
+ import tarfile
5
+ import zipfile
6
+ import requests
7
+ import os.path as osp
8
+
9
+ from urllib.request import urlopen
10
+ from urllib.parse import urlparse
11
+
12
+
13
+ class Downloader:
14
+ MB = 1024*1024
15
+ BUFSIZE = 10*MB
16
+
17
+ def __init__(self, **kwargs):
18
+ self._name = kwargs.pop('name')
19
+ self._url = kwargs.pop('url', None)
20
+ self._filename = kwargs.pop('filename')
21
+ self._sha = kwargs.pop('sha', None)
22
+ self._saveTo = kwargs.pop('saveTo', './data')
23
+ self._extractTo = kwargs.pop('extractTo', './data')
24
+
25
+ def __str__(self):
26
+ return 'Downloader for <{}>'.format(self._name)
27
+
28
+ def printRequest(self, r):
29
+ def getMB(r):
30
+ d = dict(r.info())
31
+ for c in ['content-length', 'Content-Length']:
32
+ if c in d:
33
+ return int(d[c]) / self.MB
34
+ return '<unknown>'
35
+ print(' {} {} [{} Mb]'.format(r.getcode(), r.msg, getMB(r)))
36
+
37
+ def verifyHash(self):
38
+ if not self._sha:
39
+ return False
40
+ sha = hashlib.sha1()
41
+ try:
42
+ with open(osp.join(self._saveTo, self._filename), 'rb') as f:
43
+ while True:
44
+ buf = f.read(self.BUFSIZE)
45
+ if not buf:
46
+ break
47
+ sha.update(buf)
48
+ if self._sha != sha.hexdigest():
49
+ print(' actual {}'.format(sha.hexdigest()))
50
+ print(' expect {}'.format(self._sha))
51
+ return self._sha == sha.hexdigest()
52
+ except Exception as e:
53
+ print(' catch {}'.format(e))
54
+
55
+ def get(self):
56
+ if self.verifyHash():
57
+ print(' hash match - skipping download')
58
+ else:
59
+ basedir = os.path.dirname(self._saveTo)
60
+ if basedir and not os.path.exists(basedir):
61
+ print(' creating directory: ' + basedir)
62
+ os.makedirs(basedir, exist_ok=True)
63
+
64
+ print(' hash check failed - downloading')
65
+ if 'drive.google.com' in self._url:
66
+ urlquery = urlparse(self._url).query.split('&')
67
+ for q in urlquery:
68
+ if 'id=' in q:
69
+ gid = q[3:]
70
+ sz = GDrive(gid)(osp.join(self._saveTo, self._filename))
71
+ print(' size = %.2f Mb' % (sz / (1024.0 * 1024)))
72
+ else:
73
+ print(' get {}'.format(self._url))
74
+ self.download()
75
+
76
+ # Verify hash after download
77
+ print(' done')
78
+ print(' file {}'.format(self._filename))
79
+ if self.verifyHash():
80
+ print(' hash match - extracting')
81
+ else:
82
+ print(' hash check failed - exiting')
83
+
84
+ # Extract
85
+ if '.zip' in self._filename:
86
+ print(' extracting - ', end='')
87
+ self.extract()
88
+ print('done')
89
+
90
+ return True
91
+
92
+ def download(self):
93
+ try:
94
+ r = urlopen(self._url, timeout=60)
95
+ self.printRequest(r)
96
+ self.save(r)
97
+ except Exception as e:
98
+ print(' catch {}'.format(e))
99
+
100
+ def extract(self):
101
+ fileLocation = os.path.join(self._saveTo, self._filename)
102
+ try:
103
+ if self._filename.endswith('.zip'):
104
+ with zipfile.ZipFile(fileLocation) as f:
105
+ for member in f.namelist():
106
+ path = osp.join(self._extractTo, member)
107
+ if osp.exists(path) or osp.isfile(path):
108
+ continue
109
+ else:
110
+ f.extract(member, self._extractTo)
111
+ except Exception as e:
112
+ print((' catch {}'.format(e)))
113
+
114
+ def save(self, r):
115
+ with open(self._filename, 'wb') as f:
116
+ print(' progress ', end='')
117
+ sys.stdout.flush()
118
+ while True:
119
+ buf = r.read(self.BUFSIZE)
120
+ if not buf:
121
+ break
122
+ f.write(buf)
123
+ print('>', end='')
124
+ sys.stdout.flush()
125
+
126
+
127
+ def GDrive(gid):
128
+ def download_gdrive(dst):
129
+ session = requests.Session() # re-use cookies
130
+
131
+ URL = "https://docs.google.com/uc?export=download"
132
+ response = session.get(URL, params = { 'id' : gid }, stream = True)
133
+
134
+ def get_confirm_token(response): # in case of large files
135
+ for key, value in response.cookies.items():
136
+ if key.startswith('download_warning'):
137
+ return value
138
+ return None
139
+ token = get_confirm_token(response)
140
+
141
+ if token:
142
+ params = { 'id' : gid, 'confirm' : token }
143
+ response = session.get(URL, params = params, stream = True)
144
+
145
+ BUFSIZE = 1024 * 1024
146
+ PROGRESS_SIZE = 10 * 1024 * 1024
147
+
148
+ sz = 0
149
+ progress_sz = PROGRESS_SIZE
150
+ with open(dst, "wb") as f:
151
+ for chunk in response.iter_content(BUFSIZE):
152
+ if not chunk:
153
+ continue # keep-alive
154
+
155
+ f.write(chunk)
156
+ sz += len(chunk)
157
+ if sz >= progress_sz:
158
+ progress_sz += PROGRESS_SIZE
159
+ print('>', end='')
160
+ sys.stdout.flush()
161
+ print('')
162
+ return sz
163
+ return download_gdrive
benchmark/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy==1.21.2
2
+ opencv-python==4.5.3.56
3
+ tqdm
4
+ pyyaml
5
+ requests
models/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .face_detection_yunet.yunet import YuNet
2
+ from .text_detection_db.db import DB
3
+ from .text_recognition_crnn.crnn import CRNN
4
+
5
+ class Registery:
6
+ def __init__(self, name):
7
+ self._name = name
8
+ self._dict = dict()
9
+
10
+ def get(self, key):
11
+ return self._dict[key]
12
+
13
+ def register(self, item):
14
+ self._dict[item.__name__] = item
15
+
16
+ MODELS = Registery('Models')
17
+ MODELS.register(YuNet)
18
+ MODELS.register(DB)
19
+ MODELS.register(CRNN)
models/face_detection_yunet/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Shiqi Yu <[email protected]>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
models/face_detection_yunet/README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YuNet
2
+
3
+ YuNet is a light-weight, fast and accurate face detection model, which achieves 0.834(AP_easy), 0.824(AP_medium), 0.708(AP_hard) on the WIDER Face validation set.
4
+
5
+ ## Demo
6
+
7
+ Run the following command to try the demo:
8
+ ```shell
9
+ # detect on camera input
10
+ python demo.py
11
+ # detect on an image
12
+ python demo.py --input /path/to/image
13
+ ```
14
+
15
+ ## License
16
+
17
+ All files in this directory are licensed under [MIT License](./LICENSE).
18
+
19
+ ## Reference
20
+
21
+ - https://github.com/ShiqiYu/libfacedetection
22
+ - https://github.com/ShiqiYu/libfacedetection.train
models/face_detection_yunet/demo.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is part of OpenCV Zoo project.
2
+ # It is subject to the license terms in the LICENSE file found in the same directory.
3
+ #
4
+ # Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
5
+ # Third party copyrights are property of their respective owners.
6
+
7
+ import argparse
8
+
9
+ import numpy as np
10
+ import cv2 as cv
11
+
12
+ from yunet import YuNet
13
+
14
+ def str2bool(v):
15
+ if v.lower() in ['on', 'yes', 'true', 'y', 't']:
16
+ return True
17
+ elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
18
+ return False
19
+ else:
20
+ raise NotImplementedError
21
+
22
+ parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
23
+ parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
24
+ parser.add_argument('--model', '-m', type=str, default='face_detection_yunet.onnx', help='Path to the model.')
25
+ parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
26
+ parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
27
+ parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
28
+ parser.add_argument('--keep_top_k', type=int, default=750, help='Keep keep_top_k bounding boxes after NMS.')
29
+ parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
30
+ parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
31
+ args = parser.parse_args()
32
+
33
+ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
34
+ output = image.copy()
35
+ landmark_color = [
36
+ (255, 0, 0), # right eye
37
+ ( 0, 0, 255), # left eye
38
+ ( 0, 255, 0), # nose tip
39
+ (255, 0, 255), # right mouth corner
40
+ ( 0, 255, 255) # left mouth corner
41
+ ]
42
+
43
+ if fps is not None:
44
+ cv.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, text_color)
45
+
46
+ for det in results:
47
+ bbox = det[0:4].astype(np.int32)
48
+ cv.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), box_color, 2)
49
+
50
+ conf = det[-1]
51
+ cv.putText(output, '{:.4f}'.format(conf), (bbox[0], bbox[1]+12), cv.FONT_HERSHEY_DUPLEX, 0.5, text_color)
52
+
53
+ landmarks = det[4:14].astype(np.int32).reshape((5,2))
54
+ for idx, landmark in enumerate(landmarks):
55
+ cv.circle(output, landmark, 2, landmark_color[idx], 2)
56
+
57
+ return output
58
+
59
+ if __name__ == '__main__':
60
+ # Instantiate YuNet
61
+ model = YuNet(modelPath=args.model,
62
+ inputSize=[320, 320],
63
+ confThreshold=args.conf_threshold,
64
+ nmsThreshold=args.nms_threshold,
65
+ topK=args.top_k,
66
+ keepTopK=args.keep_top_k)
67
+
68
+ # If input is an image
69
+ if args.input is not None:
70
+ image = cv.imread(args.input)
71
+ h, w, _ = image.shape
72
+
73
+ # Inference
74
+ model.setInputSize([w, h])
75
+ results = model.infer(image)
76
+
77
+ # Print results
78
+ print('{} faces detected.'.format(results.shape[0]))
79
+ for idx, det in enumerate(results):
80
+ print('{}: [{:.0f}, {:.0f}] [{:.0f}, {:.0f}], {:.2f}'.format(
81
+ idx, det[0], det[1], det[2], det[3], det[-1])
82
+ )
83
+
84
+ # Draw results on the input image
85
+ image = visualize(image, results)
86
+
87
+ # Save results if save is true
88
+ if args.save:
89
+ print('Resutls saved to result.jpg\n')
90
+ cv.imwrite('result.jpg', image)
91
+
92
+ # Visualize results in a new window
93
+ if args.vis:
94
+ cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
95
+ cv.imshow(args.input, image)
96
+ cv.waitKey(0)
97
+ else: # Omit input to call default camera
98
+ deviceId = 0
99
+ cap = cv.VideoCapture(deviceId)
100
+ w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
101
+ h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
102
+ model.setInputSize([w, h])
103
+
104
+ tm = cv.TickMeter()
105
+ while cv.waitKey(1) < 0:
106
+ hasFrame, frame = cap.read()
107
+ if not hasFrame:
108
+ print('No frames grabbed!')
109
+ break
110
+
111
+ # Inference
112
+ tm.start()
113
+ results = model.infer(frame) # results is a tuple
114
+ tm.stop()
115
+
116
+ # Draw results on the input image
117
+ frame = visualize(frame, results, fps=tm.getFPS())
118
+
119
+ # Visualize results in a new Window
120
+ cv.imshow('YuNet Demo', frame)
121
+
122
+ tm.reset()
models/face_detection_yunet/yunet.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is part of OpenCV Zoo project.
2
+ # It is subject to the license terms in the LICENSE file found in the same directory.
3
+ #
4
+ # Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
5
+ # Third party copyrights are property of their respective owners.
6
+
7
+ from itertools import product
8
+
9
+ import numpy as np
10
+ import cv2 as cv
11
+
12
+ class YuNet:
13
+ def __init__(self, modelPath, inputSize=[320, 320], confThreshold=0.6, nmsThreshold=0.3, topK=5000, keepTopK=750):
14
+ self._modelPath = modelPath
15
+ self._model = cv.dnn.readNet(self._modelPath)
16
+
17
+ self._inputNames = ''
18
+ self._outputNames = ['loc', 'conf', 'iou']
19
+ self._inputSize = inputSize # [w, h]
20
+ self._confThreshold = confThreshold
21
+ self._nmsThreshold = nmsThreshold
22
+ self._topK = topK
23
+ self._keepTopK = keepTopK
24
+
25
+ self._min_sizes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
26
+ self._steps = [8, 16, 32, 64]
27
+ self._variance = [0.1, 0.2]
28
+
29
+ # Generate priors
30
+ self._priorGen()
31
+
32
+ @property
33
+ def name(self):
34
+ return self.__class__.__name__
35
+
36
+ def setBackend(self, backend):
37
+ self._model.setPreferableBackend(backend)
38
+
39
+ def setTarget(self, target):
40
+ self._model.setPreferableTarget(target)
41
+
42
+ def setInputSize(self, input_size):
43
+ self._inputSize = input_size # [w, h]
44
+
45
+ # Regenerate priors
46
+ self._priorGen()
47
+
48
+ def _preprocess(self, image):
49
+ return cv.dnn.blobFromImage(image)
50
+
51
+ def infer(self, image):
52
+ assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
53
+ assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
54
+
55
+ # Preprocess
56
+ inputBlob = self._preprocess(image)
57
+
58
+ # Forward
59
+ self._model.setInput(inputBlob, self._inputNames)
60
+ outputBlob = self._model.forward(self._outputNames)
61
+
62
+ # Postprocess
63
+ results = self._postprocess(outputBlob)
64
+
65
+ return results
66
+
67
+ def _postprocess(self, outputBlob):
68
+ # Decode
69
+ dets = self._decode(outputBlob)
70
+
71
+ # NMS
72
+ keepIdx = cv.dnn.NMSBoxes(
73
+ bboxes=dets[:, 0:4].tolist(),
74
+ scores=dets[:, -1].tolist(),
75
+ score_threshold=self._confThreshold,
76
+ nms_threshold=self._nmsThreshold,
77
+ top_k=self._topK
78
+ ) # box_num x class_num
79
+ if len(keepIdx) > 0:
80
+ dets = dets[keepIdx]
81
+ dets = np.squeeze(dets, axis=1)
82
+ return dets[:self._keepTopK]
83
+ else:
84
+ return np.empty(shape=(0, 15))
85
+
86
+ def _priorGen(self):
87
+ w, h = self._inputSize
88
+ feature_map_2th = [int(int((h + 1) / 2) / 2),
89
+ int(int((w + 1) / 2) / 2)]
90
+ feature_map_3th = [int(feature_map_2th[0] / 2),
91
+ int(feature_map_2th[1] / 2)]
92
+ feature_map_4th = [int(feature_map_3th[0] / 2),
93
+ int(feature_map_3th[1] / 2)]
94
+ feature_map_5th = [int(feature_map_4th[0] / 2),
95
+ int(feature_map_4th[1] / 2)]
96
+ feature_map_6th = [int(feature_map_5th[0] / 2),
97
+ int(feature_map_5th[1] / 2)]
98
+
99
+ feature_maps = [feature_map_3th, feature_map_4th,
100
+ feature_map_5th, feature_map_6th]
101
+
102
+ priors = []
103
+ for k, f in enumerate(feature_maps):
104
+ min_sizes = self._min_sizes[k]
105
+ for i, j in product(range(f[0]), range(f[1])): # i->h, j->w
106
+ for min_size in min_sizes:
107
+ s_kx = min_size / w
108
+ s_ky = min_size / h
109
+
110
+ cx = (j + 0.5) * self._steps[k] / w
111
+ cy = (i + 0.5) * self._steps[k] / h
112
+
113
+ priors.append([cx, cy, s_kx, s_ky])
114
+ self.priors = np.array(priors, dtype=np.float32)
115
+
116
+ def _decode(self, outputBlob):
117
+ loc, conf, iou = outputBlob
118
+ # get score
119
+ cls_scores = conf[:, 1]
120
+ iou_scores = iou[:, 0]
121
+ # clamp
122
+ _idx = np.where(iou_scores < 0.)
123
+ iou_scores[_idx] = 0.
124
+ _idx = np.where(iou_scores > 1.)
125
+ iou_scores[_idx] = 1.
126
+ scores = np.sqrt(cls_scores * iou_scores)
127
+ scores = scores[:, np.newaxis]
128
+
129
+ scale = np.array(self._inputSize)
130
+
131
+ # get bboxes
132
+ bboxes = np.hstack((
133
+ (self.priors[:, 0:2] + loc[:, 0:2] * self._variance[0] * self.priors[:, 2:4]) * scale,
134
+ (self.priors[:, 2:4] * np.exp(loc[:, 2:4] * self._variance)) * scale
135
+ ))
136
+ # (x_c, y_c, w, h) -> (x1, y1, w, h)
137
+ bboxes[:, 0:2] -= bboxes[:, 2:4] / 2
138
+
139
+ # get landmarks
140
+ landmarks = np.hstack((
141
+ (self.priors[:, 0:2] + loc[:, 4: 6] * self._variance[0] * self.priors[:, 2:4]) * scale,
142
+ (self.priors[:, 0:2] + loc[:, 6: 8] * self._variance[0] * self.priors[:, 2:4]) * scale,
143
+ (self.priors[:, 0:2] + loc[:, 8:10] * self._variance[0] * self.priors[:, 2:4]) * scale,
144
+ (self.priors[:, 0:2] + loc[:, 10:12] * self._variance[0] * self.priors[:, 2:4]) * scale,
145
+ (self.priors[:, 0:2] + loc[:, 12:14] * self._variance[0] * self.priors[:, 2:4]) * scale
146
+ ))
147
+
148
+ dets = np.hstack((bboxes, landmarks, scores))
149
+ return dets
models/text_detection_db/LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
models/text_detection_db/README.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DB
2
+
3
+ Real-time Scene Text Detection with Differentiable Binarization
4
+
5
+ `text_detection_db.onnx` is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances. It is obtained from [here](https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html) and renamed from `DB_TD500_resnet18.onnx`.
6
+
7
+ ## Demo
8
+
9
+ Run the following command to try the demo:
10
+ ```shell
11
+ # detect on camera input
12
+ python demo.py
13
+ # detect on an image
14
+ python demo.py --input /path/to/image
15
+ ```
16
+
17
+ ## License
18
+
19
+ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
20
+
21
+ ## Reference
22
+
23
+ - https://arxiv.org/abs/1911.08947
24
+ - https://github.com/MhLiao/DB
25
+ - https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html
models/text_detection_db/db.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is part of OpenCV Zoo project.
2
+ # It is subject to the license terms in the LICENSE file found in the same directory.
3
+ #
4
+ # Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
5
+ # Third party copyrights are property of their respective owners.
6
+
7
+ import numpy as np
8
+ import cv2 as cv
9
+
10
+ class DB:
11
+ def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0):
12
+ self._modelPath = modelPath
13
+ self._model = cv.dnn_TextDetectionModel_DB(
14
+ cv.dnn.readNet(self._modelPath)
15
+ )
16
+
17
+ self._inputSize = tuple(inputSize) # (w, h)
18
+ self._inputHeight = inputSize[0]
19
+ self._inputWidth = inputSize[1]
20
+ self._binaryThreshold = binaryThreshold
21
+ self._polygonThreshold = polygonThreshold
22
+ self._maxCandidates = maxCandidates
23
+ self._unclipRatio = unclipRatio
24
+
25
+ self._model.setBinaryThreshold(self._binaryThreshold)
26
+ self._model.setPolygonThreshold(self._polygonThreshold)
27
+ self._model.setUnclipRatio(self._unclipRatio)
28
+ self._model.setMaxCandidates(self._maxCandidates)
29
+
30
+ self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
31
+
32
+ @property
33
+ def name(self):
34
+ return self.__class__.__name__
35
+
36
+ def setBackend(self, backend):
37
+ self._model.setPreferableBackend(backend)
38
+
39
+ def setTarget(self, target):
40
+ self._model.setPreferableTarget(target)
41
+
42
+ def setInputSize(self, input_size):
43
+ self._inputSize = tuple(input_size)
44
+ self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
45
+
46
+ def infer(self, image):
47
+ assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
48
+ assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
49
+
50
+ return self._model.detect(image)
models/text_detection_db/demo.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is part of OpenCV Zoo project.
2
+ # It is subject to the license terms in the LICENSE file found in the same directory.
3
+ #
4
+ # Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
5
+ # Third party copyrights are property of their respective owners.
6
+
7
+ import argparse
8
+
9
+ import numpy as np
10
+ import cv2 as cv
11
+
12
+ from db import DB
13
+
14
+ def str2bool(v):
15
+ if v.lower() in ['on', 'yes', 'true', 'y', 't']:
16
+ return True
17
+ elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
18
+ return False
19
+ else:
20
+ raise NotImplementedError
21
+
22
+ parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
23
+ parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
24
+ parser.add_argument('--model', '-m', type=str, default='text_detection_db.onnx', help='Path to the model.')
25
+ parser.add_argument('--width', type=int, default=736,
26
+ help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
27
+ parser.add_argument('--height', type=int, default=736,
28
+ help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
29
+ parser.add_argument('--binary_threshold', type=float, default=0.3, help='Threshold of the binary map.')
30
+ parser.add_argument('--polygon_threshold', type=float, default=0.5, help='Threshold of polygons.')
31
+ parser.add_argument('--max_candidates', type=int, default=200, help='Max candidates of polygons.')
32
+ parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, help=' The unclip ratio of the detected text region, which determines the output size.')
33
+ parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
34
+ parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
35
+ args = parser.parse_args()
36
+
37
+ def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
38
+ output = image.copy()
39
+
40
+ if fps is not None:
41
+ cv.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, text_color)
42
+
43
+ pts = np.array(results[0])
44
+ output = cv.polylines(output, pts, isClosed, box_color, thickness)
45
+
46
+ return output
47
+
48
+ if __name__ == '__main__':
49
+ # Instantiate DB
50
+ model = DB(modelPath=args.model,
51
+ inputSize=[args.width, args.height],
52
+ binaryThreshold=args.binary_threshold,
53
+ polygonThreshold=args.polygon_threshold,
54
+ maxCandidates=args.max_candidates,
55
+ unclipRatio=args.unclip_ratio
56
+ )
57
+
58
+ # If input is an image
59
+ if args.input is not None:
60
+ image = cv.imread(args.input)
61
+ image = cv.resize(image, [args.width, args.height])
62
+
63
+ # Inference
64
+ results = model.infer(image)
65
+
66
+ # Print results
67
+ print('{} texts detected.'.format(len(results[0])))
68
+ for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
69
+ print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score[0]))
70
+
71
+ # Draw results on the input image
72
+ image = visualize(image, results)
73
+
74
+ # Save results if save is true
75
+ if args.save:
76
+ print('Resutls saved to result.jpg\n')
77
+ cv.imwrite('result.jpg', image)
78
+
79
+ # Visualize results in a new window
80
+ if args.vis:
81
+ cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
82
+ cv.imshow(args.input, image)
83
+ cv.waitKey(0)
84
+ else: # Omit input to call default camera
85
+ deviceId = 0
86
+ cap = cv.VideoCapture(deviceId)
87
+
88
+ tm = cv.TickMeter()
89
+ while cv.waitKey(1) < 0:
90
+ hasFrame, frame = cap.read()
91
+ if not hasFrame:
92
+ print('No frames grabbed!')
93
+ break
94
+
95
+ frame = cv.resize(frame, [args.width, args.height])
96
+ # Inference
97
+ tm.start()
98
+ results = model.infer(frame) # results is a tuple
99
+ tm.stop()
100
+
101
+ # Draw results on the input image
102
+ frame = visualize(frame, results, fps=tm.getFPS())
103
+
104
+ # Visualize results in a new Window
105
+ cv.imshow('{} Demo'.format(model.name), frame)
106
+
107
+ tm.reset()
models/text_recognition_crnn/LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
models/text_recognition_crnn/README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CRNN
2
+
3
+ An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition
4
+
5
+ `text_recognition_crnn.onnx` is trained using the code from https://github.com/zihaomu/deep-text-recognition-benchmark, which can only recognize english words. It is obtained from https://drive.google.com/drive/folders/1cTbQ3nuZG-EKWak6emD_s8_hHXWz7lAr and renamed from `CRNN_VGG_BiLSTM_CTC.onnx`. Visit https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html for more information.
6
+
7
+ ## Demo
8
+
9
+ ***NOTE**: This demo use [text_detection_db](../text_detection_db) as text detector.
10
+
11
+ Run the following command to try the demo:
12
+ ```shell
13
+ # detect on camera input
14
+ python demo.py
15
+ # detect on an image
16
+ python demo.py --input /path/to/image
17
+ ```
18
+
19
+ ## License
20
+
21
+ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
22
+
23
+ ## Reference
24
+
25
+ - https://arxiv.org/abs/1507.05717
26
+ - https://github.com/bgshih/crnn
27
+ - https://github.com/meijieru/crnn.pytorch
28
+ - https://github.com/zihaomu/deep-text-recognition-benchmark
29
+ - https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html
models/text_recognition_crnn/crnn.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is part of OpenCV Zoo project.
2
+ # It is subject to the license terms in the LICENSE file found in the same directory.
3
+ #
4
+ # Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
5
+ # Third party copyrights are property of their respective owners.
6
+
7
+ import numpy as np
8
+ import cv2 as cv
9
+
10
+ class CRNN:
11
+ def __init__(self, modelPath):
12
+ self._model = cv.dnn.readNet(modelPath)
13
+ self._inputSize = [100, 32] # Fixed
14
+ self._targetVertices = np.array([
15
+ [0, self._inputSize[1] - 1],
16
+ [0, 0],
17
+ [self._inputSize[0] - 1, 0],
18
+ [self._inputSize[0] - 1, self._inputSize[1] - 1]
19
+ ], dtype=np.float32)
20
+
21
+ @property
22
+ def name(self):
23
+ return self.__class__.__name__
24
+
25
+ def setBackend(self, backend_id):
26
+ self._model.setPreferableBackend(backend_id)
27
+
28
+ def setTarget(self, target_id):
29
+ self._model.setPreferableTarget(target_id)
30
+
31
+ def _preprocess(self, image, rbbox):
32
+ # Remove conf, reshape and ensure all is np.float32
33
+ vertices = rbbox.reshape((4, 2)).astype(np.float32)
34
+
35
+ rotationMatrix = cv.getPerspectiveTransform(vertices, self._targetVertices)
36
+ cropped = cv.warpPerspective(image, rotationMatrix, self._inputSize)
37
+
38
+ cropped = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY)
39
+
40
+ return cv.dnn.blobFromImage(cropped, size=self._inputSize, mean=127.5, scalefactor=1 / 127.5)
41
+
42
+ def infer(self, image, rbbox):
43
+ # Preprocess
44
+ inputBlob = self._preprocess(image, rbbox)
45
+
46
+ # Forward
47
+ self._model.setInput(inputBlob)
48
+ outputBlob = self._model.forward()
49
+
50
+ # Postprocess
51
+ results = self._postprocess(outputBlob)
52
+
53
+ return results
54
+
55
+ def _postprocess(self, outputBlob):
56
+ '''Decode charaters from outputBlob
57
+ '''
58
+ text = ""
59
+ alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
60
+ for i in range(outputBlob.shape[0]):
61
+ c = np.argmax(outputBlob[i][0])
62
+ if c != 0:
63
+ text += alphabet[c - 1]
64
+ else:
65
+ text += '-'
66
+
67
+ # adjacent same letters as well as background text must be removed to get the final output
68
+ char_list = []
69
+ for i in range(len(text)):
70
+ if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
71
+ char_list.append(text[i])
72
+ return ''.join(char_list)
models/text_recognition_crnn/demo.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is part of OpenCV Zoo project.
2
+ # It is subject to the license terms in the LICENSE file found in the same directory.
3
+ #
4
+ # Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
5
+ # Third party copyrights are property of their respective owners.
6
+
7
+ import sys
8
+ import argparse
9
+
10
+ import numpy as np
11
+ import cv2 as cv
12
+
13
+ from crnn import CRNN
14
+
15
+ sys.path.append('../text_detection_db')
16
+ from db import DB
17
+
18
+ def str2bool(v):
19
+ if v.lower() in ['on', 'yes', 'true', 'y', 't']:
20
+ return True
21
+ elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
22
+ return False
23
+ else:
24
+ raise NotImplementedError
25
+
26
+ parser = argparse.ArgumentParser(
27
+ description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
28
+ parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
29
+ parser.add_argument('--model', '-m', type=str, default='text_recognition_crnn.onnx', help='Path to the model.')
30
+ parser.add_argument('--width', type=int, default=736,
31
+ help='The width of input image being sent to the text detector.')
32
+ parser.add_argument('--height', type=int, default=736,
33
+ help='The height of input image being sent to the text detector.')
34
+ parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
35
+ parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
36
+ args = parser.parse_args()
37
+
38
+ def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
39
+ output = image.copy()
40
+
41
+ pts = np.array(boxes[0])
42
+ output = cv.polylines(output, pts, isClosed, color, thickness)
43
+ for box, text in zip(boxes[0], texts):
44
+ cv.putText(output, text, (box[1].astype(np.int32)), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
45
+ return output
46
+
47
+ if __name__ == '__main__':
48
+ # Instantiate CRNN for text recognition
49
+ recognizer = CRNN(modelPath=args.model)
50
+ # Instantiate DB for text detection
51
+ detector = DB(modelPath='../text_detection_db/text_detection_db.onnx',
52
+ inputSize=[args.width, args.height],
53
+ binaryThreshold=0.3,
54
+ polygonThreshold=0.5,
55
+ maxCandidates=200,
56
+ unclipRatio=2.0
57
+ )
58
+
59
+ # If input is an image
60
+ if args.input is not None:
61
+ image = cv.imread(args.input)
62
+ image = cv.resize(image, [args.width, args.height])
63
+
64
+ # Inference
65
+ results = detector.infer(image)
66
+ texts = []
67
+ for box, score in zip(results[0], results[1]):
68
+ texts.append(
69
+ recognizer.infer(image, box.reshape(8))
70
+ )
71
+
72
+ # Draw results on the input image
73
+ image = visualize(image, results, texts)
74
+
75
+ # Save results if save is true
76
+ if args.save:
77
+ print('Resutls saved to result.jpg\n')
78
+ cv.imwrite('result.jpg', image)
79
+
80
+ # Visualize results in a new window
81
+ if args.vis:
82
+ cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
83
+ cv.imshow(args.input, image)
84
+ cv.waitKey(0)
85
+ else: # Omit input to call default camera
86
+ deviceId = 0
87
+ cap = cv.VideoCapture(deviceId)
88
+
89
+ tm = cv.TickMeter()
90
+ while cv.waitKey(1) < 0:
91
+ hasFrame, frame = cap.read()
92
+ if not hasFrame:
93
+ print('No frames grabbed!')
94
+ break
95
+
96
+ frame = cv.resize(frame, [args.width, args.height])
97
+ # Inference of text detector
98
+ tm.start()
99
+ results = detector.infer(frame)
100
+ tm.stop()
101
+ latency_detector = tm.getFPS()
102
+ tm.reset()
103
+ # Inference of text recognizer
104
+ texts = []
105
+ tm.start()
106
+ for box, score in zip(results[0], results[1]):
107
+ result = np.hstack(
108
+ (box.reshape(8), score)
109
+ )
110
+ texts.append(
111
+ recognizer.infer(frame, result)
112
+ )
113
+ tm.stop()
114
+ latency_recognizer = tm.getFPS()
115
+ tm.reset()
116
+
117
+ # Draw results on the input image
118
+ frame = visualize(frame, results, texts)
119
+
120
+ cv.putText(frame, 'Latency - {}: {}'.format(detector.name, latency_detector), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
121
+ cv.putText(frame, 'Latency - {}: {}'.format(recognizer.name, latency_recognizer), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
122
+
123
+ # Visualize results in a new Window
124
+ cv.imshow('{} Demo'.format(recognizer.name), frame)