import gradio as gr import numpy as np import cv2 from norfair import Detection, Tracker, Video from detector.utils import detect_plates, detect_chars, imcrop, send_request, draw_text from threading import Thread DISTANCE_THRESHOLD_BBOX: float = 0.7 DISTANCE_THRESHOLD_CENTROID: int = 30 MAX_DISTANCE: int = 10000 def yolo_to_norfair(yolo_detections): norfair_detections = [] detections_as_xyxy = yolo_detections.xyxy[0] for detection_as_xyxy in detections_as_xyxy: bbox = np.array( [ [detection_as_xyxy[0].item(), detection_as_xyxy[1].item()], [detection_as_xyxy[2].item(), detection_as_xyxy[3].item()], ] ) scores = np.array( [detection_as_xyxy[4].item(), detection_as_xyxy[4].item()] ) norfair_detections.append( Detection( points=bbox, scores=scores, label=int(detection_as_xyxy[-1].item()) ) ) return norfair_detections def fn_image(foto): plates_text = [] plates = detect_plates(foto) records = plates.pandas().xyxy[0].to_dict(orient='records') if records: for plate in records: xi, yi, xf, yf = int(plate['xmin']), int(plate['ymin']), int(plate['xmax']), int(plate['ymax']) crop = imcrop(foto, (xi, yi, xf, yf)) if len(crop) > 0: cv2.rectangle(foto, (xi, yi), (xf, yf), (0, 255, 0), 2) text = detect_chars(crop) draw_text(foto, text, (xi, yi)) plates_text.append(text) return foto, plates_text def fn_video(video, initial_time, duration): tracker = Tracker( distance_function="iou_opt", distance_threshold=DISTANCE_THRESHOLD_BBOX, ) cap = cv2.VideoCapture(video) fps = cap.get(cv2.CAP_PROP_FPS) image_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) final_video = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'VP90'), fps, image_size) num_frames = 0 min_frame = int(initial_time * fps) max_frame = int((initial_time + duration) * fps) plates = {} while cap.isOpened(): try: ret, frame = cap.read() gpu_frame = cv2.cuda_GpuMat() gpu_frame.upload(frame) if not ret: break frame_copy = frame.copy() except Exception as e: print(e) continue if num_frames < min_frame: num_frames += 1 continue yolo_detections = detect_plates(gpu_frame) detections = yolo_to_norfair(yolo_detections) tracked_objects = tracker.update(detections=detections) for obj in tracked_objects: if obj.last_detection is not None: bbox = obj.last_detection.points bbox = int(bbox[0][0]), int(bbox[0][1]), int(bbox[1][0]), int(bbox[1][1]) if obj.id not in plates.keys(): crop = imcrop(gpu_frame, bbox) text = detect_chars(crop) plates[obj.id] = text thread = Thread(target=send_request, args=(frame_copy, text, bbox)) thread.start() cv2.rectangle( gpu_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2, ) draw_text(gpu_frame, plates[obj.id], (bbox[0], bbox[1])) cv2.putText( gpu_frame, plates[obj.id], (bbox[0], bbox[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, ) final_video.write(gpu_frame) num_frames += 1 if num_frames == max_frame: break cap.release() final_video.release() return 'output.mp4', [plates[k] for k in plates.keys()] image_interface = gr.Interface( fn=fn_image, inputs="image", outputs=["image", "text"], title="Buscar números de placa en una imagen", allow_flagging=False, allow_screenshot=False, ) video_interface = gr.Interface( fn=fn_video, inputs=[ gr.Video(type="file", label="Video"), gr.Slider(0, 600, value=0, label="Tiempo inicial en segundos", step=1), gr.Slider(0, 10, value=4, label="Duración en segundos", step=1), ], outputs=["video", "text"], title="Buscar números de placa en un video", allow_flagging=False, allow_screenshot=False, ) webcam_interface = gr.Interface( fn_image, inputs=[ gr.Image(source='webcam', streaming=True), ], outputs=gr.Image(type="file"), live=True, title="Buscar placa con la cámara", allow_flagging=False, allow_screenshot=False, ) if __name__ == "__main__": gr.TabbedInterface( [image_interface, video_interface], ["Fotos", "Videos"], ).launch()