File size: 5,072 Bytes
3c84e45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import gradio as gr
import numpy as np
import cv2
from norfair import Detection, Tracker, Video
from detector.utils import detect_plates, detect_chars, imcrop, send_request, draw_text
from threading import Thread
DISTANCE_THRESHOLD_BBOX: float = 0.7
DISTANCE_THRESHOLD_CENTROID: int = 30
MAX_DISTANCE: int = 10000
def yolo_to_norfair(yolo_detections):
norfair_detections = []
detections_as_xyxy = yolo_detections.xyxy[0]
for detection_as_xyxy in detections_as_xyxy:
bbox = np.array(
[
[detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
[detection_as_xyxy[2].item(), detection_as_xyxy[3].item()],
]
)
scores = np.array(
[detection_as_xyxy[4].item(), detection_as_xyxy[4].item()]
)
norfair_detections.append(
Detection(
points=bbox, scores=scores, label=int(detection_as_xyxy[-1].item())
)
)
return norfair_detections
def fn_image(foto):
plates_text = []
plates = detect_plates(foto)
records = plates.pandas().xyxy[0].to_dict(orient='records')
if records:
for plate in records:
xi, yi, xf, yf = int(plate['xmin']), int(plate['ymin']), int(plate['xmax']), int(plate['ymax'])
crop = imcrop(foto, (xi, yi, xf, yf))
if len(crop) > 0:
cv2.rectangle(foto, (xi, yi), (xf, yf), (0, 255, 0), 2)
text = detect_chars(crop)
draw_text(foto, text, (xi, yi))
plates_text.append(text)
return foto, plates_text
def fn_video(video, initial_time, duration):
tracker = Tracker(
distance_function="iou_opt",
distance_threshold=DISTANCE_THRESHOLD_BBOX,
)
cap = cv2.VideoCapture(video)
fps = cap.get(cv2.CAP_PROP_FPS)
image_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
final_video = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'VP90'), fps, image_size)
num_frames = 0
min_frame = int(initial_time * fps)
max_frame = int((initial_time + duration) * fps)
plates = {}
while cap.isOpened():
try:
ret, frame = cap.read()
gpu_frame = cv2.cuda_GpuMat()
gpu_frame.upload(frame)
if not ret:
break
frame_copy = frame.copy()
except Exception as e:
print(e)
continue
if num_frames < min_frame:
num_frames += 1
continue
yolo_detections = detect_plates(gpu_frame)
detections = yolo_to_norfair(yolo_detections)
tracked_objects = tracker.update(detections=detections)
for obj in tracked_objects:
if obj.last_detection is not None:
bbox = obj.last_detection.points
bbox = int(bbox[0][0]), int(bbox[0][1]), int(bbox[1][0]), int(bbox[1][1])
if obj.id not in plates.keys():
crop = imcrop(gpu_frame, bbox)
text = detect_chars(crop)
plates[obj.id] = text
thread = Thread(target=send_request, args=(frame_copy, text, bbox))
thread.start()
cv2.rectangle(
gpu_frame,
(bbox[0], bbox[1]),
(bbox[2], bbox[3]),
(0, 255, 0),
2,
)
draw_text(gpu_frame, plates[obj.id], (bbox[0], bbox[1]))
cv2.putText(
gpu_frame,
plates[obj.id],
(bbox[0], bbox[1]),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
final_video.write(gpu_frame)
num_frames += 1
if num_frames == max_frame:
break
cap.release()
final_video.release()
return 'output.mp4', [plates[k] for k in plates.keys()]
image_interface = gr.Interface(
fn=fn_image,
inputs="image",
outputs=["image", "text"],
title="Buscar números de placa en una imagen",
allow_flagging=False,
allow_screenshot=False,
)
video_interface = gr.Interface(
fn=fn_video,
inputs=[
gr.Video(type="file", label="Video"),
gr.Slider(0, 600, value=0, label="Tiempo inicial en segundos", step=1),
gr.Slider(0, 10, value=4, label="Duración en segundos", step=1),
],
outputs=["video", "text"],
title="Buscar números de placa en un video",
allow_flagging=False,
allow_screenshot=False,
)
webcam_interface = gr.Interface(
fn_image,
inputs=[
gr.Image(source='webcam', streaming=True),
],
outputs=gr.Image(type="file"),
live=True,
title="Buscar placa con la cámara",
allow_flagging=False,
allow_screenshot=False,
)
if __name__ == "__main__":
gr.TabbedInterface(
[image_interface, video_interface],
["Fotos", "Videos"],
).launch()
|