import torch
import numpy as np
import gradio as gr
from PIL import Image
import cv2

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load optimized YOLOv5s model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True).to(device)

# Set model confidence threshold
model.conf = 0.5
if device.type == 'cuda':
    model.half()

def process_frame(video):
    """Reads a frame from the webcam video stream and applies YOLOv5 detection."""
    cap = cv2.VideoCapture(video)  # Open the webcam stream
    
    if not cap.isOpened():
        print("Error: Could not open video stream.")
        return None
    
    ret, frame = cap.read()
    cap.release()
    
    if not ret:
        print("Error: Could not read frame.")
        return None
    
    try:
        print("Processing frame...")
        image_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        with torch.no_grad():
            results = model(image_pil)
        
        rendered_images = results.render()
        processed_image = np.array(rendered_images[0]) if rendered_images else frame
        print("Frame processed successfully!")
        return processed_image
    
    except Exception as e:
        print(f"Processing error: {e}")
        return frame

def process_uploaded_image(image):
    """Processes the uploaded image and applies YOLOv5 object detection."""
    if image is None:
        return None

    try:
        image_pil = Image.fromarray(image)
        with torch.no_grad():
            results = model(image_pil)

        rendered_images = results.render()
        return np.array(rendered_images[0]) if rendered_images else image

    except Exception as e:
        print(f"Error processing image: {e}")
        return image

# Create Gradio UI
with gr.Blocks(title="Real-Time Object Detection") as app:
    gr.Markdown("# Real-Time Object Detection with Dual Input")

    with gr.Tabs():
        # 📷 Live Webcam Tab
        with gr.TabItem("📷 Live Camera"):
            with gr.Row():
                webcam_input = gr.Video(label="Live Feed")  
                live_output = gr.Image(label="Processed Feed")
            webcam_input.stream(process_frame, inputs=webcam_input, outputs=live_output) 

        # 🖼️ Image Upload Tab (With Submit Button)
        with gr.TabItem("🖼️ Image Upload"):
            with gr.Row():
                upload_input = gr.Image(type="numpy", label="Upload Image")
                submit_button = gr.Button("Submit")  
                upload_output = gr.Image(label="Detection Result")

            submit_button.click(process_uploaded_image, inputs=upload_input, outputs=upload_output) 

app.queue().launch(server_name="0.0.0.0", server_port=7860, share=False)