import cv2 import torch import numpy as np import gradio as gr import time import os from pathlib import Path import onnxruntime as ort # Set device for ONNX Runtime providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if torch.cuda.is_available() else ['CPUExecutionProvider'] session = ort.InferenceSession("models/yolov5n.onnx", providers=providers) # Load model class names class_names = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light"] # Modify based on model # Generate random colors for classes np.random.seed(42) colors = np.random.uniform(0, 255, size=(len(class_names), 3)) def preprocess(image): image = cv2.resize(image, (640, 640)) image = image.transpose((2, 0, 1)) / 255.0 # Normalize image = np.expand_dims(image, axis=0).astype(np.float32) return image def detect_objects(image): start_time = time.time() image_input = preprocess(image) outputs = session.run(None, {session.get_inputs()[0].name: image_input}) detections = outputs[0][0] output_image = image.copy() for det in detections: x1, y1, x2, y2, conf, cls = map(int, det[:6]) if conf > 0.6: # Confidence threshold color = colors[cls].tolist() cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2) label = f"{class_names[cls]} {conf:.2f}" cv2.putText(output_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) fps = 1 / (time.time() - start_time) cv2.putText(output_image, f"FPS: {fps:.2f}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) return output_image def real_time_detection(): cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) cap.set(cv2.CAP_PROP_FPS, 60) while cap.isOpened(): start_time = time.time() ret, frame = cap.read() if not ret: break output_frame = detect_objects(frame) cv2.imshow("Real-Time Object Detection", output_frame) if cv2.waitKey(1) & 0xFF == ord('q'): break print(f"FPS: {1 / (time.time() - start_time):.2f}") cap.release() cv2.destroyAllWindows() with gr.Blocks(title="YOLOv5 Real-Time Object Detection") as demo: gr.Markdown(""" # Real-Time Object Detection with YOLOv5 **Upload an image or run real-time detection** """) with gr.Row(): with gr.Column(): input_image = gr.Image(label="Upload Image", type="numpy") detect_button = gr.Button("Detect Objects") start_rt_button = gr.Button("Start Real-Time Detection") with gr.Column(): output_image = gr.Image(label="Detection Results", type="numpy") detect_button.click(detect_objects, inputs=input_image, outputs=output_image) start_rt_button.click(lambda: real_time_detection(), None, None) demo.launch()