testausB4 / app.py
ismot's picture
Duplicate from Prevantec/platerecognition
3c84e45
import gradio as gr
import numpy as np
import cv2
from norfair import Detection, Tracker, Video
from detector.utils import detect_plates, detect_chars, imcrop, send_request, draw_text
from threading import Thread
DISTANCE_THRESHOLD_BBOX: float = 0.7
DISTANCE_THRESHOLD_CENTROID: int = 30
MAX_DISTANCE: int = 10000
def yolo_to_norfair(yolo_detections):
norfair_detections = []
detections_as_xyxy = yolo_detections.xyxy[0]
for detection_as_xyxy in detections_as_xyxy:
bbox = np.array(
[
[detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
[detection_as_xyxy[2].item(), detection_as_xyxy[3].item()],
]
)
scores = np.array(
[detection_as_xyxy[4].item(), detection_as_xyxy[4].item()]
)
norfair_detections.append(
Detection(
points=bbox, scores=scores, label=int(detection_as_xyxy[-1].item())
)
)
return norfair_detections
def fn_image(foto):
plates_text = []
plates = detect_plates(foto)
records = plates.pandas().xyxy[0].to_dict(orient='records')
if records:
for plate in records:
xi, yi, xf, yf = int(plate['xmin']), int(plate['ymin']), int(plate['xmax']), int(plate['ymax'])
crop = imcrop(foto, (xi, yi, xf, yf))
if len(crop) > 0:
cv2.rectangle(foto, (xi, yi), (xf, yf), (0, 255, 0), 2)
text = detect_chars(crop)
draw_text(foto, text, (xi, yi))
plates_text.append(text)
return foto, plates_text
def fn_video(video, initial_time, duration):
tracker = Tracker(
distance_function="iou_opt",
distance_threshold=DISTANCE_THRESHOLD_BBOX,
)
cap = cv2.VideoCapture(video)
fps = cap.get(cv2.CAP_PROP_FPS)
image_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
final_video = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'VP90'), fps, image_size)
num_frames = 0
min_frame = int(initial_time * fps)
max_frame = int((initial_time + duration) * fps)
plates = {}
while cap.isOpened():
try:
ret, frame = cap.read()
gpu_frame = cv2.cuda_GpuMat()
gpu_frame.upload(frame)
if not ret:
break
frame_copy = frame.copy()
except Exception as e:
print(e)
continue
if num_frames < min_frame:
num_frames += 1
continue
yolo_detections = detect_plates(gpu_frame)
detections = yolo_to_norfair(yolo_detections)
tracked_objects = tracker.update(detections=detections)
for obj in tracked_objects:
if obj.last_detection is not None:
bbox = obj.last_detection.points
bbox = int(bbox[0][0]), int(bbox[0][1]), int(bbox[1][0]), int(bbox[1][1])
if obj.id not in plates.keys():
crop = imcrop(gpu_frame, bbox)
text = detect_chars(crop)
plates[obj.id] = text
thread = Thread(target=send_request, args=(frame_copy, text, bbox))
thread.start()
cv2.rectangle(
gpu_frame,
(bbox[0], bbox[1]),
(bbox[2], bbox[3]),
(0, 255, 0),
2,
)
draw_text(gpu_frame, plates[obj.id], (bbox[0], bbox[1]))
cv2.putText(
gpu_frame,
plates[obj.id],
(bbox[0], bbox[1]),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
final_video.write(gpu_frame)
num_frames += 1
if num_frames == max_frame:
break
cap.release()
final_video.release()
return 'output.mp4', [plates[k] for k in plates.keys()]
image_interface = gr.Interface(
fn=fn_image,
inputs="image",
outputs=["image", "text"],
title="Buscar números de placa en una imagen",
allow_flagging=False,
allow_screenshot=False,
)
video_interface = gr.Interface(
fn=fn_video,
inputs=[
gr.Video(type="file", label="Video"),
gr.Slider(0, 600, value=0, label="Tiempo inicial en segundos", step=1),
gr.Slider(0, 10, value=4, label="Duración en segundos", step=1),
],
outputs=["video", "text"],
title="Buscar números de placa en un video",
allow_flagging=False,
allow_screenshot=False,
)
webcam_interface = gr.Interface(
fn_image,
inputs=[
gr.Image(source='webcam', streaming=True),
],
outputs=gr.Image(type="file"),
live=True,
title="Buscar placa con la cámara",
allow_flagging=False,
allow_screenshot=False,
)
if __name__ == "__main__":
gr.TabbedInterface(
[image_interface, video_interface],
["Fotos", "Videos"],
).launch()