xangcastle commited on
Commit
c683b90
·
1 Parent(s): a8274b9

add sorter and improve detections

Browse files
Files changed (2) hide show
  1. app.py +65 -11
  2. detector/utils.py +2 -16
app.py CHANGED
@@ -1,6 +1,33 @@
1
  import gradio as gr
 
2
  import cv2
3
- from detector.utils import detect_plates, detect_chars
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  def fn_image(foto):
@@ -18,7 +45,10 @@ def fn_image(foto):
18
 
19
 
20
  def fn_video(video, initial_time, duration):
21
- plates_text = []
 
 
 
22
  cap = cv2.VideoCapture(video)
23
  fps = cap.get(cv2.CAP_PROP_FPS)
24
  image_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
@@ -26,6 +56,7 @@ def fn_video(video, initial_time, duration):
26
  num_frames = 0
27
  min_frame = int(initial_time * fps)
28
  max_frame = int((initial_time + duration) * fps)
 
29
  while cap.isOpened():
30
  try:
31
  ret, frame = cap.read()
@@ -34,24 +65,47 @@ def fn_video(video, initial_time, duration):
34
  except Exception as e:
35
  print(e)
36
  continue
 
 
 
37
  if num_frames < min_frame:
38
  num_frames += 1
39
  continue
40
- plates = detect_plates(frame)
41
- for plate in plates:
42
- p1, p2, crop = plate
43
- if len(crop) > 0:
44
- cv2.rectangle(frame, p1, p2, (0, 0, 255), 2)
45
- text, crop = detect_chars(crop)
46
- cv2.putText(frame, text, p1, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 5)
47
- plates_text.append(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  final_video.write(frame)
49
  num_frames += 1
50
  if num_frames == max_frame:
51
  break
52
  cap.release()
53
  final_video.release()
54
- return 'output.mp4', plates_text
55
 
56
 
57
  image_interface = gr.Interface(
 
1
  import gradio as gr
2
+ import numpy as np
3
  import cv2
4
+ from norfair import Detection, Tracker, Video
5
+ from detector.utils import detect_plates, detect_chars, imcrop
6
+
7
+ DISTANCE_THRESHOLD_BBOX: float = 0.7
8
+ DISTANCE_THRESHOLD_CENTROID: int = 30
9
+ MAX_DISTANCE: int = 10000
10
+
11
+
12
+ def yolo_to_norfair(yolo_detections):
13
+ norfair_detections = []
14
+ detections_as_xyxy = yolo_detections.xyxy[0]
15
+ for detection_as_xyxy in detections_as_xyxy:
16
+ bbox = np.array(
17
+ [
18
+ [detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
19
+ [detection_as_xyxy[2].item(), detection_as_xyxy[3].item()],
20
+ ]
21
+ )
22
+ scores = np.array(
23
+ [detection_as_xyxy[4].item(), detection_as_xyxy[4].item()]
24
+ )
25
+ norfair_detections.append(
26
+ Detection(
27
+ points=bbox, scores=scores, label=int(detection_as_xyxy[-1].item())
28
+ )
29
+ )
30
+ return norfair_detections
31
 
32
 
33
  def fn_image(foto):
 
45
 
46
 
47
  def fn_video(video, initial_time, duration):
48
+ tracker = Tracker(
49
+ distance_function="iou_opt",
50
+ distance_threshold=DISTANCE_THRESHOLD_BBOX,
51
+ )
52
  cap = cv2.VideoCapture(video)
53
  fps = cap.get(cv2.CAP_PROP_FPS)
54
  image_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 
56
  num_frames = 0
57
  min_frame = int(initial_time * fps)
58
  max_frame = int((initial_time + duration) * fps)
59
+ plates = {}
60
  while cap.isOpened():
61
  try:
62
  ret, frame = cap.read()
 
65
  except Exception as e:
66
  print(e)
67
  continue
68
+ # num_frames += 1
69
+ # if num_frames % 3 != 0:
70
+ # continue
71
  if num_frames < min_frame:
72
  num_frames += 1
73
  continue
74
+ yolo_detections = detect_plates(frame)
75
+ detections = yolo_to_norfair(yolo_detections)
76
+ tracked_objects = tracker.update(detections=detections)
77
+ for obj in tracked_objects:
78
+ if obj.last_detection is not None:
79
+ bbox = obj.last_detection.points
80
+ bbox = int(bbox[0][0]), int(bbox[0][1]), int(bbox[1][0]), int(bbox[1][1])
81
+ if obj.id not in plates.keys():
82
+ crop = imcrop(frame, bbox)
83
+ text = detect_chars(crop)
84
+ plates[obj.id] = text
85
+
86
+ cv2.rectangle(
87
+ frame,
88
+ (bbox[0], bbox[1]),
89
+ (bbox[2], bbox[3]),
90
+ (0, 255, 0),
91
+ 2,
92
+ )
93
+ cv2.putText(
94
+ frame,
95
+ plates[obj.id],
96
+ (bbox[0], bbox[1]),
97
+ cv2.FONT_HERSHEY_SIMPLEX,
98
+ 1,
99
+ (0, 255, 0),
100
+ 2,
101
+ )
102
  final_video.write(frame)
103
  num_frames += 1
104
  if num_frames == max_frame:
105
  break
106
  cap.release()
107
  final_video.release()
108
+ return 'output.mp4', [plates[k] for k in plates.keys()]
109
 
110
 
111
  image_interface = gr.Interface(
detector/utils.py CHANGED
@@ -30,29 +30,15 @@ def imcrop(img, bbox):
30
 
31
 
32
  def detect_plates(img):
33
- detect = model_plates(img)
34
- records = detect.pandas().xyxy[0].to_dict(orient='records')
35
- plates = []
36
- if records:
37
- for plate in records:
38
- xi, yi, xf, yf = int(plate['xmin']), int(plate['ymin']), int(plate['xmax']), int(plate['ymax'])
39
- crop = imcrop(img, (xi, yi, xf, yf))
40
- plates.append(((xi, yi), (xf, yf), crop))
41
- return plates
42
 
43
 
44
  def detect_chars(img):
45
  img = cv2.resize(img, (640, 320))
46
  detect = model_chars(img)
47
  records = detect.pandas().xyxy[0].to_dict(orient='records')
48
- yolo = np.squeeze(detect.render())
49
  text = ''
50
  if records:
51
  records = sorted(records, key=lambda d: d['xmin'])
52
  text = ''.join([i.get('name') for i in records])
53
- return text, yolo
54
-
55
-
56
- def save_plates(img):
57
- detect = model_plates(img)
58
- detect.crop(save=True)
 
30
 
31
 
32
  def detect_plates(img):
33
+ return model_plates(img)
 
 
 
 
 
 
 
 
34
 
35
 
36
  def detect_chars(img):
37
  img = cv2.resize(img, (640, 320))
38
  detect = model_chars(img)
39
  records = detect.pandas().xyxy[0].to_dict(orient='records')
 
40
  text = ''
41
  if records:
42
  records = sorted(records, key=lambda d: d['xmin'])
43
  text = ''.join([i.get('name') for i in records])
44
+ return text