opencv
/

text_recognition_crnn

ONNX

Model card Files Files and versions

xet

Community

ytfeng commited on Mar 31, 2022

Commit

27272e9

1 Parent(s): e512be0

Add options for demo scripts to select backend & targets (#43)

Browse files

* add options for selecting backend & targets

* add eol

Files changed (2) hide show

crnn.py +15 -5
demo.py +19 -2

crnn.py CHANGED Viewed

@@ -8,10 +8,17 @@ import numpy as np
 import cv2 as cv
 class CRNN:
-    def __init__(self, modelPath, charsetPath):
         self._model_path = modelPath
         self._model = cv.dnn.readNet(self._model_path)
-        self._charset = self._load_charset(charsetPath)
         self._inputSize = [100, 32] # Fixed
         self._targetVertices = np.array([
             [0, self._inputSize[1] - 1],
@@ -33,10 +40,12 @@ class CRNN:
         return charset
     def setBackend(self, backend_id):
-        self._model.setPreferableBackend(backend_id)
     def setTarget(self, target_id):
-        self._model.setPreferableTarget(target_id)
     def _preprocess(self, image, rbbox):
         # Remove conf, reshape and ensure all is np.float32
@@ -81,4 +90,5 @@ class CRNN:
         for i in range(len(text)):
             if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
                 char_list.append(text[i])
-        return ''.join(char_list)

 import cv2 as cv
 class CRNN:
+    def __init__(self, modelPath, charsetPath, backendId=0, targetId=0):
         self._model_path = modelPath
+        self._charsetPath = charsetPath
+        self._backendId = backendId
+        self._targetId = targetId
         self._model = cv.dnn.readNet(self._model_path)
+        self._model.setPreferableBackend(self._backendId)
+        self._model.setPreferableTarget(self._targetId)
+        self._charset = self._load_charset(self._charsetPath)
         self._inputSize = [100, 32] # Fixed
         self._targetVertices = np.array([
             [0, self._inputSize[1] - 1],
         return charset
     def setBackend(self, backend_id):
+        self._backendId = backend_id
+        self._model.setPreferableBackend(self._backendId)
     def setTarget(self, target_id):
+        self._targetId = target_id
+        self._model.setPreferableTarget(self._targetId)
     def _preprocess(self, image, rbbox):
         # Remove conf, reshape and ensure all is np.float32
         for i in range(len(text)):
             if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
                 char_list.append(text[i])
+        return ''.join(char_list)

demo.py CHANGED Viewed

@@ -23,10 +23,24 @@ def str2bool(v):
     else:
         raise NotImplementedError
 parser = argparse.ArgumentParser(
     description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
 parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
@@ -50,7 +64,9 @@ if __name__ == '__main__':
                   binaryThreshold=0.3,
                   polygonThreshold=0.5,
                   maxCandidates=200,
-                  unclipRatio=2.0
     )
     # If input is an image
@@ -118,4 +134,5 @@ if __name__ == '__main__':
                 print(texts)
             # Visualize results in a new Window
-            cv.imshow('{} Demo'.format(recognizer.name), frame)

     else:
         raise NotImplementedError
+backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
+targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
+help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
+help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
+try:
+    backends += [cv.dnn.DNN_BACKEND_TIMVX]
+    targets += [cv.dnn.DNN_TARGET_NPU]
+    help_msg_backends += "; {:d}: TIMVX"
+    help_msg_targets += "; {:d}: NPU"
+except:
+    print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
 parser = argparse.ArgumentParser(
     description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
 parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
 parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
+parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
+parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
 parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
 parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
 parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
                   binaryThreshold=0.3,
                   polygonThreshold=0.5,
                   maxCandidates=200,
+                  unclipRatio=2.0,
+                  backendId=args.backend,
+                  targetId=args.target
     )
     # If input is an image
                 print(texts)
             # Visualize results in a new Window
+            cv.imshow('{} Demo'.format(recognizer.name), frame)