import torch import numpy as np import gradio as gr import cv2 import time import os import onnxruntime from pathlib import Path from ultralytics import YOLO # Load YOLOv5 model without AutoShape model = torch.hub.load("ultralytics/yolov5", "yolov5n", source="local") # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() # Fuse layers for optimization model.fuse() # Export to ONNX format os.makedirs("models", exist_ok=True) model_path = Path("models/yolov5n.onnx") torch.onnx.export( model, torch.zeros(1, 3, 640, 640).to(device), # Input tensor str(model_path), export_params=True, opset_version=11, do_constant_folding=True, input_names=["images"], output_names=["output"], dynamic_axes={"images": {0: "batch_size"}, "output": {0: "batch_size"}} ) # Load ONNX model for inference session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider']) # Generate random colors for each class np.random.seed(42) colors = np.random.uniform(0, 255, size=(80, 3)) total_inference_time = 0 inference_count = 0 def detect_objects(image): global total_inference_time, inference_count if image is None: return None start_time = time.time() # Preprocess image original_shape = image.shape input_shape = (640, 640) image_resized = cv2.resize(image, input_shape) image_norm = image_resized.astype(np.float32) / 255.0 image_transposed = np.transpose(image_norm, (2, 0, 1)) image_batch = np.expand_dims(image_transposed, axis=0) # Get input name and run inference input_name = session.get_inputs()[0].name outputs = session.run(None, {input_name: image_batch}) # Process detections detections = outputs[0][0] # First batch, all detections # Calculate timing inference_time = time.time() - start_time total_inference_time += inference_time inference_count += 1 avg_inference_time = total_inference_time / inference_count fps = 1 / inference_time # Create a copy of the original image for visualization output_image = image.copy() # Scale factor for bounding box coordinates scale_x = original_shape[1] / input_shape[0] scale_y = original_shape[0] / input_shape[1] # Draw bounding boxes and labels for det in detections: x1, y1, x2, y2, conf, class_id = det[:6] if conf < 0.3: # Confidence threshold continue # Convert to original image coordinates x1, x2 = int(x1 * scale_x), int(x2 * scale_x) y1, y2 = int(y1 * scale_y), int(y2 * scale_y) class_id = int(class_id) # Draw rectangle and label color = tuple(map(int, colors[class_id])) cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2) label = f"Class {class_id} {conf:.2f}" cv2.putText(output_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) # Display FPS cv2.putText(output_image, f"FPS: {fps:.2f}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) return output_image # Gradio Interface example_images = ["spring_street_after.jpg", "pexels-hikaique-109919.jpg"] os.makedirs("examples", exist_ok=True) with gr.Blocks(title="Optimized YOLOv5 Object Detection") as demo: gr.Markdown("# **Optimized YOLOv5 Object Detection** 🚀") with gr.Row(): with gr.Column(scale=1): input_image = gr.Image(label="Input Image", type="numpy") submit_button = gr.Button("Detect Objects", variant="primary") clear_button = gr.Button("Clear") with gr.Column(scale=1): output_image = gr.Image(label="Detected Objects", type="numpy") gr.Examples( examples=example_images, inputs=input_image, outputs=output_image, fn=detect_objects, cache_examples=True ) submit_button.click(fn=detect_objects, inputs=input_image, outputs=output_image) clear_button.click(lambda: (None, None), None, [input_image, output_image]) demo.launch()