import gradio as gr
import numpy as np
import cv2
from norfair import Detection, Tracker, Video
from detector.utils import detect_plates, detect_chars, imcrop, send_request, draw_text
from threading import Thread

DISTANCE_THRESHOLD_BBOX: float = 0.7
DISTANCE_THRESHOLD_CENTROID: int = 30
MAX_DISTANCE: int = 10000


def yolo_to_norfair(yolo_detections):
    norfair_detections = []
    detections_as_xyxy = yolo_detections.xyxy[0]
    for detection_as_xyxy in detections_as_xyxy:
        bbox = np.array(
            [
                [detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
                [detection_as_xyxy[2].item(), detection_as_xyxy[3].item()],
            ]
        )
        scores = np.array(
            [detection_as_xyxy[4].item(), detection_as_xyxy[4].item()]
        )
        norfair_detections.append(
            Detection(
                points=bbox, scores=scores, label=int(detection_as_xyxy[-1].item())
            )
        )
    return norfair_detections


def fn_image(foto):
    plates_text = []
    plates = detect_plates(foto)
    records = plates.pandas().xyxy[0].to_dict(orient='records')
    if records:
        for plate in records:
            xi, yi, xf, yf = int(plate['xmin']), int(plate['ymin']), int(plate['xmax']), int(plate['ymax'])
            crop = imcrop(foto, (xi, yi, xf, yf))
            if len(crop) > 0:
                cv2.rectangle(foto, (xi, yi), (xf, yf), (0, 255, 0), 2)
                text = detect_chars(crop)
                draw_text(foto, text, (xi, yi))
                plates_text.append(text)
    return foto, plates_text


def fn_video(video, initial_time, duration):
    tracker = Tracker(
        distance_function="iou_opt",
        distance_threshold=DISTANCE_THRESHOLD_BBOX,
    )
    cap = cv2.VideoCapture(video)
    fps = cap.get(cv2.CAP_PROP_FPS)
    image_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    final_video = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'VP90'), fps, image_size)
    num_frames = 0
    min_frame = int(initial_time * fps)
    max_frame = int((initial_time + duration) * fps)
    plates = {}
    while cap.isOpened():
        try:
            ret, frame = cap.read()
            gpu_frame = cv2.cuda_GpuMat()
            gpu_frame.upload(frame)
            if not ret:
                break
            frame_copy = frame.copy()
        except Exception as e:
            print(e)
            continue
        if num_frames < min_frame:
            num_frames += 1
            continue
        yolo_detections = detect_plates(gpu_frame)
        detections = yolo_to_norfair(yolo_detections)
        tracked_objects = tracker.update(detections=detections)
        for obj in tracked_objects:
            if obj.last_detection is not None:
                bbox = obj.last_detection.points
                bbox = int(bbox[0][0]), int(bbox[0][1]), int(bbox[1][0]), int(bbox[1][1])
                if obj.id not in plates.keys():
                    crop = imcrop(gpu_frame, bbox)
                    text = detect_chars(crop)
                    plates[obj.id] = text
                    thread = Thread(target=send_request, args=(frame_copy, text, bbox))
                    thread.start()

                cv2.rectangle(
                    gpu_frame,
                    (bbox[0], bbox[1]),
                    (bbox[2], bbox[3]),
                    (0, 255, 0),
                    2,
                )
                draw_text(gpu_frame, plates[obj.id], (bbox[0], bbox[1]))
                cv2.putText(
                    gpu_frame,
                    plates[obj.id],
                    (bbox[0], bbox[1]),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (0, 255, 0),
                    2,
                )
        final_video.write(gpu_frame)
        num_frames += 1
        if num_frames == max_frame:
            break
    cap.release()
    final_video.release()
    return 'output.mp4', [plates[k] for k in plates.keys()]


image_interface = gr.Interface(
    fn=fn_image,
    inputs="image",
    outputs=["image", "text"],
    title="Buscar números de placa en una imagen",
    allow_flagging=False,
    allow_screenshot=False,
)

video_interface = gr.Interface(
    fn=fn_video,
    inputs=[
        gr.Video(type="file", label="Video"),
        gr.Slider(0, 600, value=0, label="Tiempo inicial en segundos", step=1),
        gr.Slider(0, 10, value=4, label="Duración en segundos", step=1),
    ],
    outputs=["video", "text"],
    title="Buscar números de placa en un video",
    allow_flagging=False,
    allow_screenshot=False,
)

webcam_interface = gr.Interface(
    fn_image,
    inputs=[
        gr.Image(source='webcam', streaming=True),
    ],
    outputs=gr.Image(type="file"),
    live=True,
    title="Buscar placa con la cámara",
    allow_flagging=False,
    allow_screenshot=False,
)

if __name__ == "__main__":
    gr.TabbedInterface(
        [image_interface, video_interface],
        ["Fotos", "Videos"],
    ).launch()