remove text detection DB (#221)

Files changed (13) hide show

benchmark/README.md +0 -26
benchmark/color_table.svg +0 -0
benchmark/config/text_detection_db.yaml +0 -20
benchmark/table_config.yaml +0 -14
models/__init__.py +0 -2
models/text_detection_db/CMakeLists.txt +0 -29
models/text_detection_db/LICENSE +0 -202
models/text_detection_db/README.md +0 -58
models/text_detection_db/db.py +0 -55
models/text_detection_db/demo.cpp +0 -179
models/text_detection_db/demo.py +0 -154
models/text_recognition_crnn/demo.cpp +10 -6
models/text_recognition_crnn/demo.py +4 -4

benchmark/README.md CHANGED Viewed

@@ -102,8 +102,6 @@ mean       median     min        input size   model
 26.37      33.51      21.48      [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 10.07      9.68       8.16       [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 1.19       1.30       1.07       [100, 100]   WeChatQRCode with ['detect_2021nov.prototxt', 'detect_2021nov.caffemodel', 'sr_2021nov.prototxt', 'sr_2021nov.caffemodel']
-80.97      80.06      73.20      [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-80.73      85.47      72.06      [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 23.86      24.16      23.26      [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 23.94      23.76      23.26      [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 26.89      24.78      23.26      [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -161,8 +159,6 @@ mean       median     min        input size   model
 381.72     394.15     308.62     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 194.47     195.18     191.67     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 5.90       5.90       5.81       [100, 100]   WeChatQRCode with ['detect_2021nov.prototxt', 'detect_2021nov.caffemodel', 'sr_2021nov.prototxt', 'sr_2021nov.caffemodel']
-2033.55    2454.13    1769.20    [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-1896.61    1977.38    1769.20    [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 462.50     463.67     456.98     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 462.97     464.33     456.98     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 470.79     464.35     456.98     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -221,8 +217,6 @@ mean       median     min        input size   model
 343.35     344.56     333.41     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 89.93      91.58      88.28      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 5.69       5.72       5.66       [100, 100]   WeChatQRCode with ['detect_2021nov.prototxt', 'detect_2021nov.caffemodel', 'sr_2021nov.prototxt', 'sr_2021nov.caffemodel']
-1070.55    1072.14    1055.67    [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-1071.56    1071.38    1055.67    [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 238.89     238.22     236.97     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 238.41     240.39     236.97     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 276.96     240.19     236.97     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -261,8 +255,6 @@ mean       median     min        input size   model
 38.16      37.33      37.10      [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
 91.65      91.98      89.90      [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 91.40      92.74      89.76      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
-223.24     224.30     216.37     [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-223.03     222.28     216.37     [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 112.35     111.90     109.99     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 112.68     114.63     109.93     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 183.96     112.72     109.93     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -296,8 +288,6 @@ mean       median     min        input size   model
 153.89     153.96     153.43     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
 44.29      44.03      43.62      [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 91.28      92.89      89.79      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
-254.78     256.13     245.60     [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-254.98     255.20     245.60     [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 427.53     428.67     425.63     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 427.79     429.28     425.63     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 414.07     429.46     387.26     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -350,8 +340,6 @@ mean       median     min        input size   model
 333.03     346.65     322.37     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 322.95     315.22     303.07     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 127.16     173.93     99.77      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
-975.49     977.45     952.43     [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-970.16     970.83     928.66     [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 238.38     241.90     233.21     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 238.05     236.53     232.05     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 262.58     238.47     232.05     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -437,8 +425,6 @@ mean       median     min        input size   model
 521.46     521.66     520.28     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 541.50     544.02     520.28     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 134.02     136.01     132.06     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
-1441.73    1442.80    1440.26    [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-1436.45    1437.89    1430.58    [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 360.26     360.82     359.13     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 361.22     361.51     359.13     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 427.85     362.87     359.13     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -477,8 +463,6 @@ mean       median     min        input size   model
 5.17       5.26       5.09       [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 16.45      16.44      16.31      [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
 5.58       5.57       5.54       [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
-17.15      17.18      16.83      [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-17.95      18.61      16.83      [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 ```
 ### Toybrick RV1126
@@ -524,8 +508,6 @@ mean       median     min        input size   model
 11131.81   11141.37   11080.20   [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 7065.00    7461.37    3748.85    [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 790.98     823.19     755.99     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
-49331.32   49285.30   49210.67   [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-49327.34   49489.22   49210.67   [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 4422.65    4432.92    4376.19    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 4407.88    4405.92    4353.22    [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 3782.89    4404.01    2682.63    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -584,8 +566,6 @@ mean       median     min        input size   model
 146.02     145.89     139.08     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 157.60     158.88     139.08     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 41.26      42.74      40.08      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
-384.47     401.25     360.71     [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-377.91     381.15     336.30     [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 110.51     111.04     107.73     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 110.67     111.54     107.73     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 131.52     111.76     107.73     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -644,8 +624,6 @@ mean       median     min        input size   model
 646.25     647.89     631.03     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 182.57     185.52     179.71     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 9.93       9.97       9.82       [100, 100]   WeChatQRCode with ['detect_2021nov.prototxt', 'detect_2021nov.caffemodel', 'sr_2021nov.prototxt', 'sr_2021nov.caffemodel']
-1914.15    1913.70    1902.25    [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-1920.07    1929.80    1902.25    [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 495.04     493.75     489.41     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 493.63     491.89     489.41     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 598.94     496.42     489.41     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -704,8 +682,6 @@ mean       median     min        input size   model
 14925.56   14926.90   14912.28   [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 10507.96   10944.15   6974.74    [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 1113.51    1124.83    1106.81    [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
-66015.47   65997.60   65993.81   [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-66023.14   66034.99   65993.81   [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 6094.40    6093.77    6091.85    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 6073.33    6076.77    6055.13    [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 5547.32    6057.15    4653.05    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
@@ -763,8 +739,6 @@ mean       median     min        input size   model
 7594.21    7590.75    7589.16    [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 4884.04    5154.38    2715.94    [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 548.41     550.86     546.09     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
-34074.19   34077.97   34058.43   [640, 480]   DB with ['text_detection_DB_IC15_resnet18_2021sep.onnx']
-34073.67   34069.82   34054.29   [640, 480]   DB with ['text_detection_DB_TD500_resnet18_2021sep.onnx']
 3031.81    3031.79    3030.41    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 3031.41    3031.17    3029.99    [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 2638.47    3031.01    1969.10    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']

 26.37      33.51      21.48      [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 10.07      9.68       8.16       [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 1.19       1.30       1.07       [100, 100]   WeChatQRCode with ['detect_2021nov.prototxt', 'detect_2021nov.caffemodel', 'sr_2021nov.prototxt', 'sr_2021nov.caffemodel']
 23.86      24.16      23.26      [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 23.94      23.76      23.26      [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 26.89      24.78      23.26      [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 381.72     394.15     308.62     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 194.47     195.18     191.67     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 5.90       5.90       5.81       [100, 100]   WeChatQRCode with ['detect_2021nov.prototxt', 'detect_2021nov.caffemodel', 'sr_2021nov.prototxt', 'sr_2021nov.caffemodel']
 462.50     463.67     456.98     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 462.97     464.33     456.98     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 470.79     464.35     456.98     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 343.35     344.56     333.41     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 89.93      91.58      88.28      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 5.69       5.72       5.66       [100, 100]   WeChatQRCode with ['detect_2021nov.prototxt', 'detect_2021nov.caffemodel', 'sr_2021nov.prototxt', 'sr_2021nov.caffemodel']
 238.89     238.22     236.97     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 238.41     240.39     236.97     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 276.96     240.19     236.97     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 38.16      37.33      37.10      [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
 91.65      91.98      89.90      [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 91.40      92.74      89.76      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 112.35     111.90     109.99     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 112.68     114.63     109.93     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 183.96     112.72     109.93     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 153.89     153.96     153.43     [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
 44.29      44.03      43.62      [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 91.28      92.89      89.79      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 427.53     428.67     425.63     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 427.79     429.28     425.63     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 414.07     429.46     387.26     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 333.03     346.65     322.37     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 322.95     315.22     303.07     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 127.16     173.93     99.77      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 238.38     241.90     233.21     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 238.05     236.53     232.05     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 262.58     238.47     232.05     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 521.46     521.66     520.28     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 541.50     544.02     520.28     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 134.02     136.01     132.06     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 360.26     360.82     359.13     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 361.22     361.51     359.13     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 427.85     362.87     359.13     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 5.17       5.26       5.09       [192, 192]   MPPalmDet with ['palm_detection_mediapipe_2023feb.onnx']
 16.45      16.44      16.31      [224, 224]   MPPersonDet with ['person_detection_mediapipe_2023mar.onnx']
 5.58       5.57       5.54       [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 ```
 ### Toybrick RV1126
 11131.81   11141.37   11080.20   [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 7065.00    7461.37    3748.85    [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 790.98     823.19     755.99     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 4422.65    4432.92    4376.19    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 4407.88    4405.92    4353.22    [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 3782.89    4404.01    2682.63    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 146.02     145.89     139.08     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 157.60     158.88     139.08     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 41.26      42.74      40.08      [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 110.51     111.04     107.73     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 110.67     111.54     107.73     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 131.52     111.76     107.73     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 646.25     647.89     631.03     [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 182.57     185.52     179.71     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 9.93       9.97       9.82       [100, 100]   WeChatQRCode with ['detect_2021nov.prototxt', 'detect_2021nov.caffemodel', 'sr_2021nov.prototxt', 'sr_2021nov.caffemodel']
 495.04     493.75     489.41     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 493.63     491.89     489.41     [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 598.94     496.42     489.41     [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 14925.56   14926.90   14912.28   [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 10507.96   10944.15   6974.74    [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 1113.51    1124.83    1106.81    [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 6094.40    6093.77    6091.85    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 6073.33    6076.77    6055.13    [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 5547.32    6057.15    4653.05    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']
 7594.21    7590.75    7589.16    [128, 256]   YoutuReID with ['person_reid_youtu_2021nov.onnx']
 4884.04    5154.38    2715.94    [128, 256]   YoutuReID with ['person_reid_youtu_2021nov_int8.onnx']
 548.41     550.86     546.09     [256, 256]   MPPose with ['pose_estimation_mediapipe_2023mar.onnx']
 3031.81    3031.79    3030.41    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may.onnx']
 3031.41    3031.17    3029.99    [640, 480]   PPOCRDet with ['text_detection_en_ppocrv3_2023may.onnx']
 2638.47    3031.01    1969.10    [640, 480]   PPOCRDet with ['text_detection_cn_ppocrv3_2023may_int8.onnx']

benchmark/color_table.svg CHANGED Viewed

benchmark/config/text_detection_db.yaml DELETED Viewed

@@ -1,20 +0,0 @@
-Benchmark:
-  name: "Text Detection Benchmark"
-  type: "Detection"
-  data:
-    path: "data/text"
-    files: ["1.jpg", "2.jpg", "3.jpg"]
-    sizes: # [[w1, h1], ...], Omit to run at original scale
-      - [640, 480]
-  metric:
-    warmup: 30
-    repeat: 10
-  backend: "default"
-  target: "cpu"
-Model:
-  name: "DB"
-  binaryThreshold: 0.3
-  polygonThreshold: 0.5
-  maxCandidates: 200
-  unclipRatio: 2.0

benchmark/table_config.yaml CHANGED Viewed

@@ -59,20 +59,6 @@ Models:
     acceptable_time: 2000
     keyword: "object_detection_nanodet"
-  - name: "DB-IC15 (EN)"
-    task: "Text Detection"
-    input_size: "640x480"
-    folder: "text_detection_db"
-    acceptable_time: 2000
-    keyword: "text_detection_DB_IC15_resnet18"
-  - name: "DB-TD500 (EN&CN)"
-    task: "Text Detection"
-    input_size: "640x480"
-    folder: "text_detection_db"
-    acceptable_time: 2000
-    keyword: "text_detection_DB_TD500_resnet18"
   - name: "PPOCRDet-CN"
     task: "Text Detection"
     input_size: "640x480"

     acceptable_time: 2000
     keyword: "object_detection_nanodet"
   - name: "PPOCRDet-CN"
     task: "Text Detection"
     input_size: "640x480"

models/__init__.py CHANGED Viewed

@@ -3,7 +3,6 @@ import glob
 import os
 from .face_detection_yunet.yunet import YuNet
-from .text_detection_db.db import DB
 from .text_recognition_crnn.crnn import CRNN
 from .face_recognition_sface.sface import SFace
 from .image_classification_ppresnet.ppresnet import PPResNet
@@ -78,7 +77,6 @@ class ModuleRegistery:
 MODELS = ModuleRegistery('Models')
 MODELS.register(YuNet)
-MODELS.register(DB)
 MODELS.register(CRNN)
 MODELS.register(SFace)
 MODELS.register(PPResNet)

 import os
 from .face_detection_yunet.yunet import YuNet
 from .text_recognition_crnn.crnn import CRNN
 from .face_recognition_sface.sface import SFace
 from .image_classification_ppresnet.ppresnet import PPResNet
 MODELS = ModuleRegistery('Models')
 MODELS.register(YuNet)
 MODELS.register(CRNN)
 MODELS.register(SFace)
 MODELS.register(PPResNet)

models/text_detection_db/CMakeLists.txt DELETED Viewed

@@ -1,29 +0,0 @@
-cmake_minimum_required(VERSION 3.24)
-set(project_name "opencv_zoo_text_detection_db")
-PROJECT (${project_name})
-set(OPENCV_VERSION "4.8.0")
-set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
-find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
-# Find OpenCV, you may need to set OpenCV_DIR variable
-# to the absolute path to the directory containing OpenCVConfig.cmake file
-# via the command line or GUI
-file(GLOB SourceFile
-    "demo.cpp")
-# If the package has been found, several variables will
-# be set, you can find the full list with descriptions
-# in the OpenCVConfig.cmake file.
-# Print some message showing some of them
-message(STATUS "OpenCV library status:")
-message(STATUS "    config: ${OpenCV_DIR}")
-message(STATUS "    version: ${OpenCV_VERSION}")
-message(STATUS "    libraries: ${OpenCV_LIBS}")
-message(STATUS "    include path: ${OpenCV_INCLUDE_DIRS}")
-# Declare the executable target built from your sources
-add_executable(${project_name} ${SourceFile})
-# Link your application with OpenCV libraries
-target_link_libraries(${project_name} PRIVATE ${OpenCV_LIBS})

models/text_detection_db/LICENSE DELETED Viewed

@@ -1,202 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-   1. Definitions.
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-   END OF TERMS AND CONDITIONS
-   APPENDIX: How to apply the Apache License to your work.
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-       http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.

models/text_detection_db/README.md DELETED Viewed

@@ -1,58 +0,0 @@
-# DB
-Real-time Scene Text Detection with Differentiable Binarization
-Note:
-- Models source: [here](https://drive.google.com/drive/folders/1qzNCHfUJOS0NEUOIKn69eCtxdlNPpWbq).
-- `IC15` in the filename means the model is trained on [IC15 dataset](https://rrc.cvc.uab.es/?ch=4&com=introduction), which can detect English text instances only.
-- `TD500` in the filename means the model is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances.
-- Visit https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html for more information.
-## Demo
-### Python
-Run the following command to try the demo:
-```shell
-# detect on camera input
-python demo.py
-# detect on an image
-python demo.py --input /path/to/image -v
-# get help regarding various parameters
-python demo.py --help
-```
-### C++
-Install latest OpenCV and CMake >= 3.24.0 to get started with:
-```shell
-# A typical and default installation path of OpenCV is /usr/local
-cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
-cmake --build build
-# detect on camera input
-./build/opencv_zoo_text_detection_db -m=/path/to/model
-# detect on an image
-./build/opencv_zoo_text_detection_db -m=/path/to/model -i=/path/to/image -v
-# get help messages
-./build/opencv_zoo_text_detection_db -h
-```
-### Example outputs
-![mask](./example_outputs/mask.jpg)
-![gsoc](./example_outputs/gsoc.jpg)
-## License
-All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
-## Reference
-- https://arxiv.org/abs/1911.08947
-- https://github.com/MhLiao/DB
-- https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html

models/text_detection_db/db.py DELETED Viewed

@@ -1,55 +0,0 @@
-# This file is part of OpenCV Zoo project.
-# It is subject to the license terms in the LICENSE file found in the same directory.
-#
-# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
-# Third party copyrights are property of their respective owners.
-import numpy as np
-import cv2 as cv
-class DB:
-    def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
-        self._modelPath = modelPath
-        self._model = cv.dnn_TextDetectionModel_DB(
-            cv.dnn.readNet(self._modelPath)
-        )
-        self._inputSize = tuple(inputSize) # (w, h)
-        self._inputHeight = inputSize[0]
-        self._inputWidth = inputSize[1]
-        self._binaryThreshold = binaryThreshold
-        self._polygonThreshold = polygonThreshold
-        self._maxCandidates = maxCandidates
-        self._unclipRatio = unclipRatio
-        self._backendId = backendId
-        self._targetId = targetId
-        self._model.setPreferableBackend(self._backendId)
-        self._model.setPreferableTarget(self._targetId)
-        self._model.setBinaryThreshold(self._binaryThreshold)
-        self._model.setPolygonThreshold(self._polygonThreshold)
-        self._model.setUnclipRatio(self._unclipRatio)
-        self._model.setMaxCandidates(self._maxCandidates)
-        self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
-    @property
-    def name(self):
-        return self.__class__.__name__
-    def setBackendAndTarget(self, backendId, targetId):
-        self._backendId = backendId
-        self._targetId = targetId
-        self._model.setPreferableBackend(self._backendId)
-        self._model.setPreferableTarget(self._targetId)
-    def setInputSize(self, input_size):
-        self._inputSize = tuple(input_size)
-        self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
-    def infer(self, image):
-        assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
-        assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
-        return self._model.detect(image)

models/text_detection_db/demo.cpp DELETED Viewed

@@ -1,179 +0,0 @@
-#include <iostream>
-#include <opencv2/dnn.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-using namespace std;
-using namespace cv;
-using namespace dnn;
-vector< pair<cv::dnn::Backend, cv::dnn::Target> > backendTargetPairs = {
-        std::make_pair<cv::dnn::Backend, cv::dnn::Target>(dnn::DNN_BACKEND_OPENCV, dnn::DNN_TARGET_CPU),
-        std::make_pair<cv::dnn::Backend, cv::dnn::Target>(dnn::DNN_BACKEND_CUDA, dnn::DNN_TARGET_CUDA),
-        std::make_pair<cv::dnn::Backend, cv::dnn::Target>(dnn::DNN_BACKEND_CUDA, dnn::DNN_TARGET_CUDA_FP16),
-        std::make_pair<cv::dnn::Backend, cv::dnn::Target>(dnn::DNN_BACKEND_TIMVX, dnn::DNN_TARGET_NPU),
-        std::make_pair<cv::dnn::Backend, cv::dnn::Target>(dnn::DNN_BACKEND_CANN, dnn::DNN_TARGET_NPU)};
-std::string keys =
-"{ help  h           |                                              | Print help message. }"
-"{ model m           | text_detection_DB_IC15_resnet18_2021sep.onnx | Usage: Set model type, defaults to text_detection_DB_IC15_resnet18_2021sep.onnx }"
-"{ input i           |                                              | Usage: Path to input image or video file. Skip this argument to capture frames from a camera.}"
-"{ width             | 736                                          | Usage: Resize input image to certain width, default = 736. It should be multiple by 32.}"
-"{ height            | 736                                          | Usage: Resize input image to certain height, default = 736. It should be multiple by 32.}"
-"{ binary_threshold  | 0.3                                          | Usage: Threshold of the binary map, default = 0.3.}"
-"{ polygon_threshold | 0.5                                          | Usage: Threshold of polygons, default = 0.5.}"
-"{ max_candidates    | 200                                          | Usage: Set maximum number of polygon candidates, default = 200.}"
-"{ unclip_ratio      | 2.0                                          | Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.}"
-"{ save s            | true                                         | Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.}"
-"{ viz v             | true                                         | Usage: Specify to open a new window to show results. Invalid in case of camera input.}"
-"{ backend bt        | 0                                            | Choose one of computation backends: "
-"0: (default) OpenCV implementation + CPU, "
-"1: CUDA + GPU (CUDA), "
-"2: CUDA + GPU (CUDA FP16), "
-"3: TIM-VX + NPU, "
-"4: CANN + NPU}";
-class DB {
-public:
-    DB(string modPath, Size inSize = Size(736, 736), float binThresh = 0.3,
-        float polyThresh = 0.5, int maxCand = 200, double unRatio = 2.0,
-        dnn::Backend bId = DNN_BACKEND_DEFAULT, dnn::Target tId = DNN_TARGET_CPU) : modelPath(modPath), inputSize(inSize), binaryThreshold(binThresh),
-        polygonThreshold(polyThresh), maxCandidates(maxCand), unclipRatio(unRatio),
-        backendId(bId), targetId(tId)
-    {
-        this->model = TextDetectionModel_DB(readNet(modelPath));
-        this->model.setPreferableBackend(backendId);
-        this->model.setPreferableTarget(targetId);
-        this->model.setBinaryThreshold(binaryThreshold);
-        this->model.setPolygonThreshold(polygonThreshold);
-        this->model.setUnclipRatio(unclipRatio);
-        this->model.setMaxCandidates(maxCandidates);
-        this->model.setInputParams(1.0 / 255.0, inputSize, Scalar(122.67891434, 116.66876762, 104.00698793));
-    }
-    pair< vector<vector<Point>>, vector<float> > infer(Mat image) {
-        CV_Assert(image.rows == this->inputSize.height && "height of input image != net input size ");
-        CV_Assert(image.cols == this->inputSize.width && "width of input image != net input size ");
-        vector<vector<Point>> pt;
-        vector<float> confidence;
-        this->model.detect(image, pt, confidence);
-        return make_pair< vector<vector<Point>> &, vector< float > &>(pt, confidence);
-    }
-private:
-    string modelPath;
-    TextDetectionModel_DB model;
-    Size inputSize;
-    float binaryThreshold;
-    float polygonThreshold;
-    int maxCandidates;
-    double unclipRatio;
-    dnn::Backend backendId;
-    dnn::Target targetId;
-};
-Mat visualize(Mat image, pair< vector<vector<Point>>, vector<float> >&results, double fps=-1, Scalar boxColor=Scalar(0, 255, 0), Scalar textColor=Scalar(0, 0, 255), bool isClosed=true, int thickness=2)
-{
-    Mat output;
-    image.copyTo(output);
-    if (fps > 0)
-        putText(output, format("FPS: %.2f", fps), Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, textColor);
-    polylines(output, results.first, isClosed, boxColor, thickness);
-    return output;
-}
-int main(int argc, char** argv)
-{
-    CommandLineParser parser(argc, argv, keys);
-    parser.about("Use this program to run Real-time Scene Text Detection with Differentiable Binarization in opencv Zoo  using OpenCV.");
-    if (parser.has("help"))
-    {
-        parser.printMessage();
-        return 0;
-    }
-    int backendTargetid = parser.get<int>("backend");
-    String modelName = parser.get<String>("model");
-    if (modelName.empty())
-    {
-        CV_Error(Error::StsError, "Model file " + modelName + " not found");
-    }
-    Size inpSize(parser.get<int>("width"), parser.get<int>("height"));
-    float binThresh = parser.get<float>("binary_threshold");
-    float polyThresh = parser.get<float>("polygon_threshold");
-    int maxCand = parser.get<int>("max_candidates");
-    double unRatio = parser.get<float>("unclip_ratio");
-    bool save = parser.get<bool>("save");
-    bool viz = parser.get<float>("viz");
-    DB model(modelName, inpSize, binThresh, polyThresh, maxCand, unRatio, backendTargetPairs[backendTargetid].first, backendTargetPairs[backendTargetid].second);
-    //! [Open a video file or an image file or a camera stream]
-    VideoCapture cap;
-    if (parser.has("input"))
-        cap.open(parser.get<String>("input"));
-    else
-        cap.open(0);
-    if (!cap.isOpened())
-        CV_Error(Error::StsError, "Cannot opend video or file");
-    Mat originalImage;
-    static const std::string kWinName = modelName;
-    while (waitKey(1) < 0)
-    {
-        cap >> originalImage;
-        if (originalImage.empty())
-        {
-            cout << "Frame is empty" << endl;
-            waitKey();
-            break;
-        }
-        int originalW = originalImage.cols;
-        int originalH = originalImage.rows;
-        double scaleHeight = originalH / double(inpSize.height);
-        double scaleWidth = originalW / double(inpSize.width);
-        Mat image;
-        resize(originalImage, image, inpSize);
-        // inference
-        TickMeter tm;
-        tm.start();
-        pair< vector<vector<Point>>, vector<float> > results = model.infer(image);
-        tm.stop();
-        auto x = results.first;
-        // Scale the results bounding box
-        for (auto &pts : results.first)
-        {
-            for (int i = 0; i < 4; i++)
-            {
-                pts[i].x = int(pts[i].x * scaleWidth);
-                pts[i].y = int(pts[i].y * scaleHeight);
-            }
-        }
-        originalImage = visualize(originalImage, results, tm.getFPS());
-        tm.reset();
-        if (parser.has("input"))
-        {
-            if (save)
-            {
-                cout << "Result image saved to result.jpg\n";
-                imwrite("result.jpg", originalImage);
-            }
-            if (viz)
-                imshow(kWinName, originalImage);
-        }
-        else
-            imshow(kWinName, originalImage);
-    }
-    return 0;
-}

models/text_detection_db/demo.py DELETED Viewed

@@ -1,154 +0,0 @@
-# This file is part of OpenCV Zoo project.
-# It is subject to the license terms in the LICENSE file found in the same directory.
-#
-# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
-# Third party copyrights are property of their respective owners.
-import argparse
-import numpy as np
-import cv2 as cv
-from db import DB
-# Check OpenCV version
-assert cv.__version__ >= "4.8.0", \
-       "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
-# Valid combinations of backends and targets
-backend_target_pairs = [
-    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
-    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
-    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
-    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
-    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
-]
-parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
-parser.add_argument('--input', '-i', type=str,
-                    help='Usage: Set path to the input image. Omit for using default camera.')
-parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx',
-                    help='Usage: Set model path, defaults to text_detection_DB_TD500_resnet18_2021sep.onnx.')
-parser.add_argument('--backend_target', '-bt', type=int, default=0,
-                    help='''Choose one of the backend-target pair to run this demo:
-                        {:d}: (default) OpenCV implementation + CPU,
-                        {:d}: CUDA + GPU (CUDA),
-                        {:d}: CUDA + GPU (CUDA FP16),
-                        {:d}: TIM-VX + NPU,
-                        {:d}: CANN + NPU
-                    '''.format(*[x for x in range(len(backend_target_pairs))]))
-parser.add_argument('--width', type=int, default=736,
-                    help='Usage: Resize input image to certain width, default = 736. It should be multiple by 32.')
-parser.add_argument('--height', type=int, default=736,
-                    help='Usage: Resize input image to certain height, default = 736. It should be multiple by 32.')
-parser.add_argument('--binary_threshold', type=float, default=0.3,
-                    help='Usage: Threshold of the binary map, default = 0.3.')
-parser.add_argument('--polygon_threshold', type=float, default=0.5,
-                    help='Usage: Threshold of polygons, default = 0.5.')
-parser.add_argument('--max_candidates', type=int, default=200,
-                    help='Usage: Set maximum number of polygon candidates, default = 200.')
-parser.add_argument('--unclip_ratio', type=np.float64, default=2.0,
-                    help=' Usage: The unclip ratio of the detected text region, which determines the output size, default = 2.0.')
-parser.add_argument('--save', '-s', action='store_true',
-                    help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
-parser.add_argument('--vis', '-v', action='store_true',
-                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
-args = parser.parse_args()
-def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
-    output = image.copy()
-    if fps is not None:
-        cv.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, text_color)
-    pts = np.array(results[0])
-    output = cv.polylines(output, pts, isClosed, box_color, thickness)
-    return output
-if __name__ == '__main__':
-    backend_id = backend_target_pairs[args.backend_target][0]
-    target_id = backend_target_pairs[args.backend_target][1]
-    # Instantiate DB
-    model = DB(modelPath=args.model,
-               inputSize=[args.width, args.height],
-               binaryThreshold=args.binary_threshold,
-               polygonThreshold=args.polygon_threshold,
-               maxCandidates=args.max_candidates,
-               unclipRatio=args.unclip_ratio,
-               backendId=backend_id,
-               targetId=target_id)
-    # If input is an image
-    if args.input is not None:
-        original_image = cv.imread(args.input)
-        original_w = original_image.shape[1]
-        original_h = original_image.shape[0]
-        scaleHeight = original_h / args.height
-        scaleWidth = original_w / args.width
-        image = cv.resize(original_image, [args.width, args.height])
-        # Inference
-        results = model.infer(image)
-        # Scale the results bounding box
-        for i in range(len(results[0])):
-            for j in range(4):
-                box = results[0][i][j]
-                results[0][i][j][0] = box[0] * scaleWidth
-                results[0][i][j][1] = box[1] * scaleHeight
-        # Print results
-        print('{} texts detected.'.format(len(results[0])))
-        for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
-            print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
-        # Draw results on the input image
-        original_image = visualize(original_image, results)
-        # Save results if save is true
-        if args.save:
-            print('Resutls saved to result.jpg\n')
-            cv.imwrite('result.jpg', original_image)
-        # Visualize results in a new window
-        if args.vis:
-            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
-            cv.imshow(args.input, original_image)
-            cv.waitKey(0)
-    else: # Omit input to call default camera
-        deviceId = 0
-        cap = cv.VideoCapture(deviceId)
-        tm = cv.TickMeter()
-        while cv.waitKey(1) < 0:
-            hasFrame, original_image = cap.read()
-            if not hasFrame:
-                print('No frames grabbed!')
-                break
-            original_w = original_image.shape[1]
-            original_h = original_image.shape[0]
-            scaleHeight = original_h / args.height
-            scaleWidth = original_w / args.width
-            frame = cv.resize(original_image, [args.width, args.height])
-            # Inference
-            tm.start()
-            results = model.infer(frame) # results is a tuple
-            tm.stop()
-            # Scale the results bounding box
-            for i in range(len(results[0])):
-                for j in range(4):
-                    box = results[0][i][j]
-                    results[0][i][j][0] = box[0] * scaleWidth
-                    results[0][i][j][1] = box[1] * scaleHeight
-            # Draw results on the input image
-            original_image = visualize(original_image, results, fps=tm.getFPS())
-            # Visualize results in a new Window
-            cv.imshow('{} Demo'.format(model.name), original_image)
-            tm.reset()

models/text_recognition_crnn/demo.cpp CHANGED Viewed

@@ -41,10 +41,10 @@ std::string keys =
 "4: CANN + NPU}";
-class DB {
 public:
-    DB(string modPath, Size inSize = Size(736, 736), float binThresh = 0.3,
         float polyThresh = 0.5, int maxCand = 200, double unRatio = 2.0,
         dnn::Backend bId = DNN_BACKEND_DEFAULT, dnn::Target tId = DNN_TARGET_CPU) : modelPath(modPath), inputSize(inSize), binaryThreshold(binThresh),
         polygonThreshold(polyThresh), maxCandidates(maxCand), unclipRatio(unRatio),
@@ -215,7 +215,7 @@ int main(int argc, char** argv)
     bool save = parser.get<bool>("save");
     bool viz = parser.get<float>("viz");
-    DB detector("../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx", inpSize, binThresh, polyThresh, maxCand, unRatio, backendTargetPairs[backendTargetid].first, backendTargetPairs[backendTargetid].second);
     CRNN recognizer(modelPath, backendTargetPairs[backendTargetid].first, backendTargetPairs[backendTargetid].second);
     //! [Open a video file or an image file or a camera stream]
     VideoCapture cap;
@@ -232,9 +232,13 @@ int main(int argc, char** argv)
         cap >> originalImage;
         if (originalImage.empty())
         {
-            cout << "Frame is empty" << endl;
-            waitKey();
-            break;
         }
         int originalW = originalImage.cols;
         int originalH = originalImage.rows;

 "4: CANN + NPU}";
+class PPOCRDet {
 public:
+    PPOCRDet(string modPath, Size inSize = Size(736, 736), float binThresh = 0.3,
         float polyThresh = 0.5, int maxCand = 200, double unRatio = 2.0,
         dnn::Backend bId = DNN_BACKEND_DEFAULT, dnn::Target tId = DNN_TARGET_CPU) : modelPath(modPath), inputSize(inSize), binaryThreshold(binThresh),
         polygonThreshold(polyThresh), maxCandidates(maxCand), unclipRatio(unRatio),
     bool save = parser.get<bool>("save");
     bool viz = parser.get<float>("viz");
+    PPOCRDet detector("../text_detection_ppocr/text_detection_en_ppocrv3_2023may.onnx", inpSize, binThresh, polyThresh, maxCand, unRatio, backendTargetPairs[backendTargetid].first, backendTargetPairs[backendTargetid].second);
     CRNN recognizer(modelPath, backendTargetPairs[backendTargetid].first, backendTargetPairs[backendTargetid].second);
     //! [Open a video file or an image file or a camera stream]
     VideoCapture cap;
         cap >> originalImage;
         if (originalImage.empty())
         {
+            if (parser.has("input"))
+            {
+                cout << "Frame is empty" << endl;
+                break;
+            }
+            else
+                continue;
         }
         int originalW = originalImage.cols;
         int originalH = originalImage.rows;

models/text_recognition_crnn/demo.py CHANGED Viewed

@@ -12,8 +12,8 @@ import cv2 as cv
 from crnn import CRNN
-sys.path.append('../text_detection_db')
-from db import DB
 # Check OpenCV version
 assert cv.__version__ >= "4.8.0", \
@@ -65,8 +65,8 @@ if __name__ == '__main__':
     backend_id = backend_target_pairs[args.backend_target][0]
     target_id = backend_target_pairs[args.backend_target][1]
-    # Instantiate DB for text detection
-    detector = DB(modelPath='../text_detection_db/text_detection_DB_IC15_resnet18_2021sep.onnx',
                   inputSize=[args.width, args.height],
                   binaryThreshold=0.3,
                   polygonThreshold=0.5,

 from crnn import CRNN
+sys.path.append('../text_detection_ppocr')
+from ppocr_det import PPOCRDet
 # Check OpenCV version
 assert cv.__version__ >= "4.8.0", \
     backend_id = backend_target_pairs[args.backend_target][0]
     target_id = backend_target_pairs[args.backend_target][1]
+    # Instantiate PPOCRDet for text detection
+    detector = PPOCRDet(modelPath='../text_detection_ppocr/text_detection_en_ppocrv3_2023may.onnx',
                   inputSize=[args.width, args.height],
                   binaryThreshold=0.3,
                   polygonThreshold=0.5,