import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
import onnxruntime
from pathlib import Path
from ultralytics import YOLO

# Load YOLOv5 model without AutoShape
model = torch.hub.load("ultralytics/yolov5", "yolov5n", source="local")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Fuse layers for optimization
model.fuse()

# Export to ONNX format
os.makedirs("models", exist_ok=True)
model_path = Path("models/yolov5n.onnx")

torch.onnx.export(
    model,
    torch.zeros(1, 3, 640, 640).to(device),  # Input tensor
    str(model_path),
    export_params=True,
    opset_version=11,
    do_constant_folding=True,
    input_names=["images"],
    output_names=["output"],
    dynamic_axes={"images": {0: "batch_size"}, "output": {0: "batch_size"}}
)

# Load ONNX model for inference
session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider'])

# Generate random colors for each class
np.random.seed(42)
colors = np.random.uniform(0, 255, size=(80, 3))  

total_inference_time = 0
inference_count = 0

def detect_objects(image):
    global total_inference_time, inference_count
    if image is None:
        return None
        
    start_time = time.time()
    
    # Preprocess image
    original_shape = image.shape
    input_shape = (640, 640)
    image_resized = cv2.resize(image, input_shape)
    image_norm = image_resized.astype(np.float32) / 255.0
    image_transposed = np.transpose(image_norm, (2, 0, 1))
    image_batch = np.expand_dims(image_transposed, axis=0)
    
    # Get input name and run inference
    input_name = session.get_inputs()[0].name
    outputs = session.run(None, {input_name: image_batch})
    
    # Process detections
    detections = outputs[0][0]  # First batch, all detections
    
    # Calculate timing
    inference_time = time.time() - start_time
    total_inference_time += inference_time
    inference_count += 1
    avg_inference_time = total_inference_time / inference_count
    fps = 1 / inference_time
    
    # Create a copy of the original image for visualization
    output_image = image.copy()
    
    # Scale factor for bounding box coordinates
    scale_x = original_shape[1] / input_shape[0]
    scale_y = original_shape[0] / input_shape[1]
    
    # Draw bounding boxes and labels
    for det in detections:
        x1, y1, x2, y2, conf, class_id = det[:6]
        if conf < 0.3:  # Confidence threshold
            continue
            
        # Convert to original image coordinates
        x1, x2 = int(x1 * scale_x), int(x2 * scale_x)
        y1, y2 = int(y1 * scale_y), int(y2 * scale_y)
        class_id = int(class_id)
        
        # Draw rectangle and label
        color = tuple(map(int, colors[class_id]))
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2)
        label = f"Class {class_id} {conf:.2f}"
        cv2.putText(output_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    
    # Display FPS
    cv2.putText(output_image, f"FPS: {fps:.2f}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    return output_image

# Gradio Interface
example_images = ["spring_street_after.jpg", "pexels-hikaique-109919.jpg"]
os.makedirs("examples", exist_ok=True)

with gr.Blocks(title="Optimized YOLOv5 Object Detection") as demo:
    gr.Markdown("# **Optimized YOLOv5 Object Detection** 🚀")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(label="Input Image", type="numpy")
            submit_button = gr.Button("Detect Objects", variant="primary")
            clear_button = gr.Button("Clear")

        with gr.Column(scale=1):
            output_image = gr.Image(label="Detected Objects", type="numpy")

    gr.Examples(
        examples=example_images,
        inputs=input_image,
        outputs=output_image,
        fn=detect_objects,
        cache_examples=True
    )

    submit_button.click(fn=detect_objects, inputs=input_image, outputs=output_image)
    clear_button.click(lambda: (None, None), None, [input_image, output_image])

demo.launch()