import cv2
import torch
import numpy as np
import gradio as gr
from ultralytics import YOLO
import threading

# Load YOLOv5 model (optimized for CUDA if available)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLO("yolov5s.pt").to(device)

def detect_objects(image):
    """Detect objects in an uploaded image."""
    results = model(image)
    detections = results[0].boxes.data.cpu().numpy()  # Get detections
    
    for box in detections:
        x1, y1, x2, y2, conf, cls = map(int, box[:6])
        label = f"{model.names[cls]} {conf:.2f}"
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    return image

# Real-time webcam processing
cap = cv2.VideoCapture(0)  # Capture from webcam
frame = None
lock = threading.Lock()

def process_webcam():
    global frame
    while True:
        ret, img = cap.read()
        if not ret:
            continue
        
        results = model(img)
        detections = results[0].boxes.data.cpu().numpy()
        
        for box in detections:
            x1, y1, x2, y2, conf, cls = map(int, box[:6])
            label = f"{model.names[cls]} {conf:.2f}"
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        with lock:
            frame = img

# Start the webcam thread
threading.Thread(target=process_webcam, daemon=True).start()

def get_webcam_frame():
    """Returns the latest processed webcam frame."""
    with lock:
        return frame if frame is not None else np.zeros((480, 640, 3), dtype=np.uint8)

# Gradio UI
demo = gr.Blocks()

with demo:
    gr.Markdown("# YOLOv5 Real-Time Object Detection")
    with gr.Tabs():
        with gr.Tab("Real-Time Webcam"):
            gr.Video(get_webcam_frame, streaming=True)
        with gr.Tab("Upload Image"):
            image_input = gr.Image(type="numpy")
            image_output = gr.Image()
            image_button = gr.Button("Detect Objects")
            image_button.click(detect_objects, inputs=image_input, outputs=image_output)

demo.launch()