File size: 3,722 Bytes
caff61e
bccf53b
dc80d48
0152e0c
 
caff61e
0e19825
0152e0c
0e19825
b5a364c
0e19825
0152e0c
36e1064
0e19825
 
 
 
 
 
0152e0c
 
 
0e19825
 
0152e0c
0e19825
 
 
e82b28e
8513c99
0e19825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8513c99
0e19825
3e3644e
0e19825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8513c99
0e19825
8513c99
 
 
 
0e19825
 
 
 
 
 
 
 
8513c99
0e19825
8513c99
 
0e19825
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import torch
import numpy as np
import gradio as gr
import cv2
import time

# Check device availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load YOLOv5x model (larger model for better accuracy)
model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True).to(device)

# Optimization configurations
model.conf = 0.3  # Confidence threshold of 0.3 as specified
model.iou = 0.3   # NMS IoU threshold of 0.3 as specified
model.classes = None  # Detect all 80+ COCO classes

# Enable half-precision for GPU acceleration
if device.type == "cuda":
    model.half()  # Use FP16 for performance boost

# Set model to evaluation mode for inference
model.eval()

# Assign fixed colors to each class for consistent visualization
np.random.seed(42)  # For reproducible colors
colors = np.random.uniform(0, 255, size=(len(model.names), 3))

def detect_objects(image):
    """
    Process input image for object detection using YOLOv5
    
    Args:
        image: Input image as numpy array
        
    Returns:
        output_image: Image with detection results visualized
    """
    start_time = time.time()
    
    # Convert image to RGB if it's in BGR format
    if image.shape[2] == 3 and image[0,0,0] == image[0,0,2]:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Create a copy for drawing results
    output_image = image.copy()
    
    # Resize input to 640x640 for optimal processing speed
    input_size = 640
    
    # Perform inference with no gradient calculation
    with torch.no_grad():
        results = model(image, size=input_size)
    
    # Extract detections from first (and only) image
    detections = results.pred[0].cpu().numpy()
    
    # Draw each detection on the output image
    for *xyxy, conf, cls in detections:
        # Extract coordinates and convert to integers
        x1, y1, x2, y2 = map(int, xyxy)
        class_id = int(cls)
        
        # Get color for this class
        color = colors[class_id].tolist()
        
        # Draw bounding box
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2)
        
        # Create label with class name and confidence score
        label = f"{model.names[class_id]} {conf:.2f}"
        
        # Calculate text size for background rectangle
        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        
        # Draw label background
        cv2.rectangle(output_image, (x1, y1 - 20), (x1 + w, y1), color, -1)
        
        # Draw label text
        cv2.putText(output_image, label, (x1, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    # Calculate and display FPS
    fps = 1 / (time.time() - start_time)
    
    # Add FPS counter to the image
    cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    print(f"Detection complete - FPS: {fps:.2f}")
    
    return output_image

# Create Gradio interface
iface = gr.Interface(
    fn=detect_objects,
    inputs=gr.Image(type="numpy", label="Upload Image"),
    outputs=gr.Image(type="numpy", label="Detected Objects"),
    title="Optimized Object Detection with YOLOv5x",
    description="""
    This system utilizes YOLOv5x to detect 80+ object types from the COCO dataset.
    - Processing speed: Optimized for 30+ FPS at 640x640 resolution
    - Confidence threshold: 0.3
    - IoU threshold: 0.3
    - Color-coded bounding boxes with confidence scores
    """,
    allow_flagging="never",
    examples=["spring_street_after.jpg", "pexels-hikaique-109919.jpg"],
)

# Launch the interface
if __name__ == "__main__":
    iface.launch()