opencv
/

opencv_zoo

ONNX

Model card Files Files and versions

xet

Community

Zihao Mu commited on Oct 13, 2022

Commit

b81d9fd

1 Parent(s): d30c3db

add scale factor to DB demo (#96)

Browse files

Files changed (2) hide show

models/text_detection_db/demo.py +31 -9
models/text_recognition_crnn/demo.py +32 -9

models/text_detection_db/demo.py CHANGED Viewed

@@ -73,29 +73,40 @@ if __name__ == '__main__':
     # If input is an image
     if args.input is not None:
-        image = cv.imread(args.input)
-        image = cv.resize(image, [args.width, args.height])
         # Inference
         results = model.infer(image)
         # Print results
         print('{} texts detected.'.format(len(results[0])))
         for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
             print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
         # Draw results on the input image
-        image = visualize(image, results)
         # Save results if save is true
         if args.save:
             print('Resutls saved to result.jpg\n')
-            cv.imwrite('result.jpg', image)
         # Visualize results in a new window
         if args.vis:
             cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
-            cv.imshow(args.input, image)
             cv.waitKey(0)
     else: # Omit input to call default camera
         deviceId = 0
@@ -103,22 +114,33 @@ if __name__ == '__main__':
         tm = cv.TickMeter()
         while cv.waitKey(1) < 0:
-            hasFrame, frame = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
-            frame = cv.resize(frame, [args.width, args.height])
             # Inference
             tm.start()
             results = model.infer(frame) # results is a tuple
             tm.stop()
             # Draw results on the input image
-            frame = visualize(frame, results, fps=tm.getFPS())
             # Visualize results in a new Window
-            cv.imshow('{} Demo'.format(model.name), frame)
             tm.reset()

     # If input is an image
     if args.input is not None:
+        original_image = cv.imread(args.input)
+        original_w = original_image.shape[1]
+        original_h = original_image.shape[0]
+        scaleHeight = original_h / args.height
+        scaleWidth = original_w / args.width
+        image = cv.resize(original_image, [args.width, args.height])
         # Inference
         results = model.infer(image)
+        # Scale the results bounding box
+        for i in range(len(results[0])):
+            for j in range(4):
+                box = results[0][i][j]
+                results[0][i][j][0] = box[0] * scaleWidth
+                results[0][i][j][1] = box[1] * scaleHeight
         # Print results
         print('{} texts detected.'.format(len(results[0])))
         for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
             print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
         # Draw results on the input image
+        original_image = visualize(original_image, results)
         # Save results if save is true
         if args.save:
             print('Resutls saved to result.jpg\n')
+            cv.imwrite('result.jpg', original_image)
         # Visualize results in a new window
         if args.vis:
             cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
+            cv.imshow(args.input, original_image)
             cv.waitKey(0)
     else: # Omit input to call default camera
         deviceId = 0
         tm = cv.TickMeter()
         while cv.waitKey(1) < 0:
+            hasFrame, original_image = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
+            original_w = original_image.shape[1]
+            original_h = original_image.shape[0]
+            scaleHeight = original_h / args.height
+            scaleWidth = original_w / args.width
+            frame = cv.resize(original_image, [args.width, args.height])
             # Inference
             tm.start()
             results = model.infer(frame) # results is a tuple
             tm.stop()
+            # Scale the results bounding box
+            for i in range(len(results[0])):
+                for j in range(4):
+                    box = results[0][i][j]
+                    results[0][i][j][0] = box[0] * scaleWidth
+                    results[0][i][j][1] = box[1] * scaleHeight
             # Draw results on the input image
+            original_image = visualize(original_image, results, fps=tm.getFPS())
             # Visualize results in a new Window
+            cv.imshow('{} Demo'.format(model.name), original_image)
             tm.reset()

models/text_recognition_crnn/demo.py CHANGED Viewed

@@ -75,8 +75,12 @@ if __name__ == '__main__':
     # If input is an image
     if args.input is not None:
-        image = cv.imread(args.input)
-        image = cv.resize(image, [args.width, args.height])
         # Inference
         results = detector.infer(image)
@@ -86,18 +90,25 @@ if __name__ == '__main__':
                 recognizer.infer(image, box.reshape(8))
             )
         # Draw results on the input image
-        image = visualize(image, results, texts)
         # Save results if save is true
         if args.save:
             print('Resutls saved to result.jpg\n')
-            cv.imwrite('result.jpg', image)
         # Visualize results in a new window
         if args.vis:
             cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
-            cv.imshow(args.input, image)
             cv.waitKey(0)
     else: # Omit input to call default camera
         deviceId = 0
@@ -105,12 +116,17 @@ if __name__ == '__main__':
         tm = cv.TickMeter()
         while cv.waitKey(1) < 0:
-            hasFrame, frame = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
-            frame = cv.resize(frame, [args.width, args.height])
             # Inference of text detector
             tm.start()
             results = detector.infer(frame)
@@ -133,10 +149,17 @@ if __name__ == '__main__':
                 cv.putText(frame, 'Latency - {}: {:.2f}'.format(recognizer.name, tm.getFPS()), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
                 tm.reset()
                 # Draw results on the input image
-                frame = visualize(frame, results, texts)
                 print(texts)
             # Visualize results in a new Window
-            cv.imshow('{} Demo'.format(recognizer.name), frame)

     # If input is an image
     if args.input is not None:
+        original_image = cv.imread(args.input)
+        original_w = original_image.shape[1]
+        original_h = original_image.shape[0]
+        scaleHeight = original_h / args.height
+        scaleWidth = original_w / args.width
+        image = cv.resize(original_image, [args.width, args.height])
         # Inference
         results = detector.infer(image)
                 recognizer.infer(image, box.reshape(8))
             )
+        # Scale the results bounding box
+        for i in range(len(results[0])):
+            for j in range(4):
+                box = results[0][i][j]
+                results[0][i][j][0] = box[0] * scaleWidth
+                results[0][i][j][1] = box[1] * scaleHeight
         # Draw results on the input image
+        original_image = visualize(original_image, results, texts)
         # Save results if save is true
         if args.save:
             print('Resutls saved to result.jpg\n')
+            cv.imwrite('result.jpg', original_image)
         # Visualize results in a new window
         if args.vis:
             cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
+            cv.imshow(args.input, original_image)
             cv.waitKey(0)
     else: # Omit input to call default camera
         deviceId = 0
         tm = cv.TickMeter()
         while cv.waitKey(1) < 0:
+            hasFrame, original_image = cap.read()
             if not hasFrame:
                 print('No frames grabbed!')
                 break
+            original_w = original_image.shape[1]
+            original_h = original_image.shape[0]
+            scaleHeight = original_h / args.height
+            scaleWidth = original_w / args.width
+            frame = cv.resize(original_image, [args.width, args.height])
             # Inference of text detector
             tm.start()
             results = detector.infer(frame)
                 cv.putText(frame, 'Latency - {}: {:.2f}'.format(recognizer.name, tm.getFPS()), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
                 tm.reset()
+                # Scale the results bounding box
+                for i in range(len(results[0])):
+                    for j in range(4):
+                        box = results[0][i][j]
+                        results[0][i][j][0] = box[0] * scaleWidth
+                        results[0][i][j][1] = box[1] * scaleHeight
                 # Draw results on the input image
+                original_image = visualize(original_image, results, texts)
                 print(texts)
             # Visualize results in a new Window
+            cv.imshow('{} Demo'.format(recognizer.name), original_image)