Spaces:
Sleeping
Sleeping
File size: 3,722 Bytes
caff61e bccf53b dc80d48 0152e0c caff61e 0e19825 0152e0c 0e19825 b5a364c 0e19825 0152e0c 36e1064 0e19825 0152e0c 0e19825 0152e0c 0e19825 e82b28e 8513c99 0e19825 8513c99 0e19825 3e3644e 0e19825 8513c99 0e19825 8513c99 0e19825 8513c99 0e19825 8513c99 0e19825 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import torch
import numpy as np
import gradio as gr
import cv2
import time
# Check device availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Load YOLOv5x model (larger model for better accuracy)
model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True).to(device)
# Optimization configurations
model.conf = 0.3 # Confidence threshold of 0.3 as specified
model.iou = 0.3 # NMS IoU threshold of 0.3 as specified
model.classes = None # Detect all 80+ COCO classes
# Enable half-precision for GPU acceleration
if device.type == "cuda":
model.half() # Use FP16 for performance boost
# Set model to evaluation mode for inference
model.eval()
# Assign fixed colors to each class for consistent visualization
np.random.seed(42) # For reproducible colors
colors = np.random.uniform(0, 255, size=(len(model.names), 3))
def detect_objects(image):
"""
Process input image for object detection using YOLOv5
Args:
image: Input image as numpy array
Returns:
output_image: Image with detection results visualized
"""
start_time = time.time()
# Convert image to RGB if it's in BGR format
if image.shape[2] == 3 and image[0,0,0] == image[0,0,2]:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Create a copy for drawing results
output_image = image.copy()
# Resize input to 640x640 for optimal processing speed
input_size = 640
# Perform inference with no gradient calculation
with torch.no_grad():
results = model(image, size=input_size)
# Extract detections from first (and only) image
detections = results.pred[0].cpu().numpy()
# Draw each detection on the output image
for *xyxy, conf, cls in detections:
# Extract coordinates and convert to integers
x1, y1, x2, y2 = map(int, xyxy)
class_id = int(cls)
# Get color for this class
color = colors[class_id].tolist()
# Draw bounding box
cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2)
# Create label with class name and confidence score
label = f"{model.names[class_id]} {conf:.2f}"
# Calculate text size for background rectangle
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# Draw label background
cv2.rectangle(output_image, (x1, y1 - 20), (x1 + w, y1), color, -1)
# Draw label text
cv2.putText(output_image, label, (x1, y1 - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
# Calculate and display FPS
fps = 1 / (time.time() - start_time)
# Add FPS counter to the image
cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
print(f"Detection complete - FPS: {fps:.2f}")
return output_image
# Create Gradio interface
iface = gr.Interface(
fn=detect_objects,
inputs=gr.Image(type="numpy", label="Upload Image"),
outputs=gr.Image(type="numpy", label="Detected Objects"),
title="Optimized Object Detection with YOLOv5x",
description="""
This system utilizes YOLOv5x to detect 80+ object types from the COCO dataset.
- Processing speed: Optimized for 30+ FPS at 640x640 resolution
- Confidence threshold: 0.3
- IoU threshold: 0.3
- Color-coded bounding boxes with confidence scores
""",
allow_flagging="never",
examples=["spring_street_after.jpg", "pexels-hikaique-109919.jpg"],
)
# Launch the interface
if __name__ == "__main__":
iface.launch() |