Zihao Mu commited on
Commit
b81d9fd
·
1 Parent(s): d30c3db

add scale factor to DB demo (#96)

Browse files
models/text_detection_db/demo.py CHANGED
@@ -73,29 +73,40 @@ if __name__ == '__main__':
73
 
74
  # If input is an image
75
  if args.input is not None:
76
- image = cv.imread(args.input)
77
- image = cv.resize(image, [args.width, args.height])
 
 
 
 
78
 
79
  # Inference
80
  results = model.infer(image)
81
 
 
 
 
 
 
 
 
82
  # Print results
83
  print('{} texts detected.'.format(len(results[0])))
84
  for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
85
  print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
86
 
87
  # Draw results on the input image
88
- image = visualize(image, results)
89
 
90
  # Save results if save is true
91
  if args.save:
92
  print('Resutls saved to result.jpg\n')
93
- cv.imwrite('result.jpg', image)
94
 
95
  # Visualize results in a new window
96
  if args.vis:
97
  cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
98
- cv.imshow(args.input, image)
99
  cv.waitKey(0)
100
  else: # Omit input to call default camera
101
  deviceId = 0
@@ -103,22 +114,33 @@ if __name__ == '__main__':
103
 
104
  tm = cv.TickMeter()
105
  while cv.waitKey(1) < 0:
106
- hasFrame, frame = cap.read()
107
  if not hasFrame:
108
  print('No frames grabbed!')
109
  break
110
 
111
- frame = cv.resize(frame, [args.width, args.height])
 
 
 
 
112
  # Inference
113
  tm.start()
114
  results = model.infer(frame) # results is a tuple
115
  tm.stop()
116
 
 
 
 
 
 
 
 
117
  # Draw results on the input image
118
- frame = visualize(frame, results, fps=tm.getFPS())
119
 
120
  # Visualize results in a new Window
121
- cv.imshow('{} Demo'.format(model.name), frame)
122
 
123
  tm.reset()
124
 
 
73
 
74
  # If input is an image
75
  if args.input is not None:
76
+ original_image = cv.imread(args.input)
77
+ original_w = original_image.shape[1]
78
+ original_h = original_image.shape[0]
79
+ scaleHeight = original_h / args.height
80
+ scaleWidth = original_w / args.width
81
+ image = cv.resize(original_image, [args.width, args.height])
82
 
83
  # Inference
84
  results = model.infer(image)
85
 
86
+ # Scale the results bounding box
87
+ for i in range(len(results[0])):
88
+ for j in range(4):
89
+ box = results[0][i][j]
90
+ results[0][i][j][0] = box[0] * scaleWidth
91
+ results[0][i][j][1] = box[1] * scaleHeight
92
+
93
  # Print results
94
  print('{} texts detected.'.format(len(results[0])))
95
  for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
96
  print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
97
 
98
  # Draw results on the input image
99
+ original_image = visualize(original_image, results)
100
 
101
  # Save results if save is true
102
  if args.save:
103
  print('Resutls saved to result.jpg\n')
104
+ cv.imwrite('result.jpg', original_image)
105
 
106
  # Visualize results in a new window
107
  if args.vis:
108
  cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
109
+ cv.imshow(args.input, original_image)
110
  cv.waitKey(0)
111
  else: # Omit input to call default camera
112
  deviceId = 0
 
114
 
115
  tm = cv.TickMeter()
116
  while cv.waitKey(1) < 0:
117
+ hasFrame, original_image = cap.read()
118
  if not hasFrame:
119
  print('No frames grabbed!')
120
  break
121
 
122
+ original_w = original_image.shape[1]
123
+ original_h = original_image.shape[0]
124
+ scaleHeight = original_h / args.height
125
+ scaleWidth = original_w / args.width
126
+ frame = cv.resize(original_image, [args.width, args.height])
127
  # Inference
128
  tm.start()
129
  results = model.infer(frame) # results is a tuple
130
  tm.stop()
131
 
132
+ # Scale the results bounding box
133
+ for i in range(len(results[0])):
134
+ for j in range(4):
135
+ box = results[0][i][j]
136
+ results[0][i][j][0] = box[0] * scaleWidth
137
+ results[0][i][j][1] = box[1] * scaleHeight
138
+
139
  # Draw results on the input image
140
+ original_image = visualize(original_image, results, fps=tm.getFPS())
141
 
142
  # Visualize results in a new Window
143
+ cv.imshow('{} Demo'.format(model.name), original_image)
144
 
145
  tm.reset()
146
 
models/text_recognition_crnn/demo.py CHANGED
@@ -75,8 +75,12 @@ if __name__ == '__main__':
75
 
76
  # If input is an image
77
  if args.input is not None:
78
- image = cv.imread(args.input)
79
- image = cv.resize(image, [args.width, args.height])
 
 
 
 
80
 
81
  # Inference
82
  results = detector.infer(image)
@@ -86,18 +90,25 @@ if __name__ == '__main__':
86
  recognizer.infer(image, box.reshape(8))
87
  )
88
 
 
 
 
 
 
 
 
89
  # Draw results on the input image
90
- image = visualize(image, results, texts)
91
 
92
  # Save results if save is true
93
  if args.save:
94
  print('Resutls saved to result.jpg\n')
95
- cv.imwrite('result.jpg', image)
96
 
97
  # Visualize results in a new window
98
  if args.vis:
99
  cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
100
- cv.imshow(args.input, image)
101
  cv.waitKey(0)
102
  else: # Omit input to call default camera
103
  deviceId = 0
@@ -105,12 +116,17 @@ if __name__ == '__main__':
105
 
106
  tm = cv.TickMeter()
107
  while cv.waitKey(1) < 0:
108
- hasFrame, frame = cap.read()
109
  if not hasFrame:
110
  print('No frames grabbed!')
111
  break
112
 
113
- frame = cv.resize(frame, [args.width, args.height])
 
 
 
 
 
114
  # Inference of text detector
115
  tm.start()
116
  results = detector.infer(frame)
@@ -133,10 +149,17 @@ if __name__ == '__main__':
133
  cv.putText(frame, 'Latency - {}: {:.2f}'.format(recognizer.name, tm.getFPS()), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
134
  tm.reset()
135
 
 
 
 
 
 
 
 
136
  # Draw results on the input image
137
- frame = visualize(frame, results, texts)
138
  print(texts)
139
 
140
  # Visualize results in a new Window
141
- cv.imshow('{} Demo'.format(recognizer.name), frame)
142
 
 
75
 
76
  # If input is an image
77
  if args.input is not None:
78
+ original_image = cv.imread(args.input)
79
+ original_w = original_image.shape[1]
80
+ original_h = original_image.shape[0]
81
+ scaleHeight = original_h / args.height
82
+ scaleWidth = original_w / args.width
83
+ image = cv.resize(original_image, [args.width, args.height])
84
 
85
  # Inference
86
  results = detector.infer(image)
 
90
  recognizer.infer(image, box.reshape(8))
91
  )
92
 
93
+ # Scale the results bounding box
94
+ for i in range(len(results[0])):
95
+ for j in range(4):
96
+ box = results[0][i][j]
97
+ results[0][i][j][0] = box[0] * scaleWidth
98
+ results[0][i][j][1] = box[1] * scaleHeight
99
+
100
  # Draw results on the input image
101
+ original_image = visualize(original_image, results, texts)
102
 
103
  # Save results if save is true
104
  if args.save:
105
  print('Resutls saved to result.jpg\n')
106
+ cv.imwrite('result.jpg', original_image)
107
 
108
  # Visualize results in a new window
109
  if args.vis:
110
  cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
111
+ cv.imshow(args.input, original_image)
112
  cv.waitKey(0)
113
  else: # Omit input to call default camera
114
  deviceId = 0
 
116
 
117
  tm = cv.TickMeter()
118
  while cv.waitKey(1) < 0:
119
+ hasFrame, original_image = cap.read()
120
  if not hasFrame:
121
  print('No frames grabbed!')
122
  break
123
 
124
+ original_w = original_image.shape[1]
125
+ original_h = original_image.shape[0]
126
+ scaleHeight = original_h / args.height
127
+ scaleWidth = original_w / args.width
128
+
129
+ frame = cv.resize(original_image, [args.width, args.height])
130
  # Inference of text detector
131
  tm.start()
132
  results = detector.infer(frame)
 
149
  cv.putText(frame, 'Latency - {}: {:.2f}'.format(recognizer.name, tm.getFPS()), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
150
  tm.reset()
151
 
152
+ # Scale the results bounding box
153
+ for i in range(len(results[0])):
154
+ for j in range(4):
155
+ box = results[0][i][j]
156
+ results[0][i][j][0] = box[0] * scaleWidth
157
+ results[0][i][j][1] = box[1] * scaleHeight
158
+
159
  # Draw results on the input image
160
+ original_image = visualize(original_image, results, texts)
161
  print(texts)
162
 
163
  # Visualize results in a new Window
164
+ cv.imshow('{} Demo'.format(recognizer.name), original_image)
165