Zihao Mu
commited on
Commit
·
b81d9fd
1
Parent(s):
d30c3db
add scale factor to DB demo (#96)
Browse files
models/text_detection_db/demo.py
CHANGED
@@ -73,29 +73,40 @@ if __name__ == '__main__':
|
|
73 |
|
74 |
# If input is an image
|
75 |
if args.input is not None:
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
78 |
|
79 |
# Inference
|
80 |
results = model.infer(image)
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
# Print results
|
83 |
print('{} texts detected.'.format(len(results[0])))
|
84 |
for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
|
85 |
print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
|
86 |
|
87 |
# Draw results on the input image
|
88 |
-
|
89 |
|
90 |
# Save results if save is true
|
91 |
if args.save:
|
92 |
print('Resutls saved to result.jpg\n')
|
93 |
-
cv.imwrite('result.jpg',
|
94 |
|
95 |
# Visualize results in a new window
|
96 |
if args.vis:
|
97 |
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
98 |
-
cv.imshow(args.input,
|
99 |
cv.waitKey(0)
|
100 |
else: # Omit input to call default camera
|
101 |
deviceId = 0
|
@@ -103,22 +114,33 @@ if __name__ == '__main__':
|
|
103 |
|
104 |
tm = cv.TickMeter()
|
105 |
while cv.waitKey(1) < 0:
|
106 |
-
hasFrame,
|
107 |
if not hasFrame:
|
108 |
print('No frames grabbed!')
|
109 |
break
|
110 |
|
111 |
-
|
|
|
|
|
|
|
|
|
112 |
# Inference
|
113 |
tm.start()
|
114 |
results = model.infer(frame) # results is a tuple
|
115 |
tm.stop()
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
# Draw results on the input image
|
118 |
-
|
119 |
|
120 |
# Visualize results in a new Window
|
121 |
-
cv.imshow('{} Demo'.format(model.name),
|
122 |
|
123 |
tm.reset()
|
124 |
|
|
|
73 |
|
74 |
# If input is an image
|
75 |
if args.input is not None:
|
76 |
+
original_image = cv.imread(args.input)
|
77 |
+
original_w = original_image.shape[1]
|
78 |
+
original_h = original_image.shape[0]
|
79 |
+
scaleHeight = original_h / args.height
|
80 |
+
scaleWidth = original_w / args.width
|
81 |
+
image = cv.resize(original_image, [args.width, args.height])
|
82 |
|
83 |
# Inference
|
84 |
results = model.infer(image)
|
85 |
|
86 |
+
# Scale the results bounding box
|
87 |
+
for i in range(len(results[0])):
|
88 |
+
for j in range(4):
|
89 |
+
box = results[0][i][j]
|
90 |
+
results[0][i][j][0] = box[0] * scaleWidth
|
91 |
+
results[0][i][j][1] = box[1] * scaleHeight
|
92 |
+
|
93 |
# Print results
|
94 |
print('{} texts detected.'.format(len(results[0])))
|
95 |
for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
|
96 |
print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score))
|
97 |
|
98 |
# Draw results on the input image
|
99 |
+
original_image = visualize(original_image, results)
|
100 |
|
101 |
# Save results if save is true
|
102 |
if args.save:
|
103 |
print('Resutls saved to result.jpg\n')
|
104 |
+
cv.imwrite('result.jpg', original_image)
|
105 |
|
106 |
# Visualize results in a new window
|
107 |
if args.vis:
|
108 |
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
109 |
+
cv.imshow(args.input, original_image)
|
110 |
cv.waitKey(0)
|
111 |
else: # Omit input to call default camera
|
112 |
deviceId = 0
|
|
|
114 |
|
115 |
tm = cv.TickMeter()
|
116 |
while cv.waitKey(1) < 0:
|
117 |
+
hasFrame, original_image = cap.read()
|
118 |
if not hasFrame:
|
119 |
print('No frames grabbed!')
|
120 |
break
|
121 |
|
122 |
+
original_w = original_image.shape[1]
|
123 |
+
original_h = original_image.shape[0]
|
124 |
+
scaleHeight = original_h / args.height
|
125 |
+
scaleWidth = original_w / args.width
|
126 |
+
frame = cv.resize(original_image, [args.width, args.height])
|
127 |
# Inference
|
128 |
tm.start()
|
129 |
results = model.infer(frame) # results is a tuple
|
130 |
tm.stop()
|
131 |
|
132 |
+
# Scale the results bounding box
|
133 |
+
for i in range(len(results[0])):
|
134 |
+
for j in range(4):
|
135 |
+
box = results[0][i][j]
|
136 |
+
results[0][i][j][0] = box[0] * scaleWidth
|
137 |
+
results[0][i][j][1] = box[1] * scaleHeight
|
138 |
+
|
139 |
# Draw results on the input image
|
140 |
+
original_image = visualize(original_image, results, fps=tm.getFPS())
|
141 |
|
142 |
# Visualize results in a new Window
|
143 |
+
cv.imshow('{} Demo'.format(model.name), original_image)
|
144 |
|
145 |
tm.reset()
|
146 |
|
models/text_recognition_crnn/demo.py
CHANGED
@@ -75,8 +75,12 @@ if __name__ == '__main__':
|
|
75 |
|
76 |
# If input is an image
|
77 |
if args.input is not None:
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
80 |
|
81 |
# Inference
|
82 |
results = detector.infer(image)
|
@@ -86,18 +90,25 @@ if __name__ == '__main__':
|
|
86 |
recognizer.infer(image, box.reshape(8))
|
87 |
)
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# Draw results on the input image
|
90 |
-
|
91 |
|
92 |
# Save results if save is true
|
93 |
if args.save:
|
94 |
print('Resutls saved to result.jpg\n')
|
95 |
-
cv.imwrite('result.jpg',
|
96 |
|
97 |
# Visualize results in a new window
|
98 |
if args.vis:
|
99 |
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
100 |
-
cv.imshow(args.input,
|
101 |
cv.waitKey(0)
|
102 |
else: # Omit input to call default camera
|
103 |
deviceId = 0
|
@@ -105,12 +116,17 @@ if __name__ == '__main__':
|
|
105 |
|
106 |
tm = cv.TickMeter()
|
107 |
while cv.waitKey(1) < 0:
|
108 |
-
hasFrame,
|
109 |
if not hasFrame:
|
110 |
print('No frames grabbed!')
|
111 |
break
|
112 |
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
114 |
# Inference of text detector
|
115 |
tm.start()
|
116 |
results = detector.infer(frame)
|
@@ -133,10 +149,17 @@ if __name__ == '__main__':
|
|
133 |
cv.putText(frame, 'Latency - {}: {:.2f}'.format(recognizer.name, tm.getFPS()), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
|
134 |
tm.reset()
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
# Draw results on the input image
|
137 |
-
|
138 |
print(texts)
|
139 |
|
140 |
# Visualize results in a new Window
|
141 |
-
cv.imshow('{} Demo'.format(recognizer.name),
|
142 |
|
|
|
75 |
|
76 |
# If input is an image
|
77 |
if args.input is not None:
|
78 |
+
original_image = cv.imread(args.input)
|
79 |
+
original_w = original_image.shape[1]
|
80 |
+
original_h = original_image.shape[0]
|
81 |
+
scaleHeight = original_h / args.height
|
82 |
+
scaleWidth = original_w / args.width
|
83 |
+
image = cv.resize(original_image, [args.width, args.height])
|
84 |
|
85 |
# Inference
|
86 |
results = detector.infer(image)
|
|
|
90 |
recognizer.infer(image, box.reshape(8))
|
91 |
)
|
92 |
|
93 |
+
# Scale the results bounding box
|
94 |
+
for i in range(len(results[0])):
|
95 |
+
for j in range(4):
|
96 |
+
box = results[0][i][j]
|
97 |
+
results[0][i][j][0] = box[0] * scaleWidth
|
98 |
+
results[0][i][j][1] = box[1] * scaleHeight
|
99 |
+
|
100 |
# Draw results on the input image
|
101 |
+
original_image = visualize(original_image, results, texts)
|
102 |
|
103 |
# Save results if save is true
|
104 |
if args.save:
|
105 |
print('Resutls saved to result.jpg\n')
|
106 |
+
cv.imwrite('result.jpg', original_image)
|
107 |
|
108 |
# Visualize results in a new window
|
109 |
if args.vis:
|
110 |
cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
|
111 |
+
cv.imshow(args.input, original_image)
|
112 |
cv.waitKey(0)
|
113 |
else: # Omit input to call default camera
|
114 |
deviceId = 0
|
|
|
116 |
|
117 |
tm = cv.TickMeter()
|
118 |
while cv.waitKey(1) < 0:
|
119 |
+
hasFrame, original_image = cap.read()
|
120 |
if not hasFrame:
|
121 |
print('No frames grabbed!')
|
122 |
break
|
123 |
|
124 |
+
original_w = original_image.shape[1]
|
125 |
+
original_h = original_image.shape[0]
|
126 |
+
scaleHeight = original_h / args.height
|
127 |
+
scaleWidth = original_w / args.width
|
128 |
+
|
129 |
+
frame = cv.resize(original_image, [args.width, args.height])
|
130 |
# Inference of text detector
|
131 |
tm.start()
|
132 |
results = detector.infer(frame)
|
|
|
149 |
cv.putText(frame, 'Latency - {}: {:.2f}'.format(recognizer.name, tm.getFPS()), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
|
150 |
tm.reset()
|
151 |
|
152 |
+
# Scale the results bounding box
|
153 |
+
for i in range(len(results[0])):
|
154 |
+
for j in range(4):
|
155 |
+
box = results[0][i][j]
|
156 |
+
results[0][i][j][0] = box[0] * scaleWidth
|
157 |
+
results[0][i][j][1] = box[1] * scaleHeight
|
158 |
+
|
159 |
# Draw results on the input image
|
160 |
+
original_image = visualize(original_image, results, texts)
|
161 |
print(texts)
|
162 |
|
163 |
# Visualize results in a new Window
|
164 |
+
cv.imshow('{} Demo'.format(recognizer.name), original_image)
|
165 |
|