File size: 4,283 Bytes
caff61e
bccf53b
dc80d48
0152e0c
 
a186d85
9ac3666
a186d85
d791bba
 
151b93e
 
a186d85
151b93e
0152e0c
151b93e
 
b5a364c
151b93e
 
 
 
 
9ac3666
36e1064
151b93e
 
 
 
 
 
 
 
 
 
 
0152e0c
151b93e
9ac3666
0152e0c
9ac3666
d5e3d23
7536404
e82b28e
a186d85
 
 
8513c99
a186d85
 
 
ac43c04
a186d85
ac43c04
 
 
151b93e
ac43c04
 
 
 
 
 
 
 
 
 
 
 
 
a186d85
 
 
 
 
ac43c04
 
 
 
 
 
 
 
 
9ac3666
ac43c04
9ac3666
 
ac43c04
 
 
 
 
 
 
 
 
9ac3666
ac43c04
 
9ac3666
ac43c04
 
 
0e19825
8513c99
9ac3666
b1205e9
a186d85
 
 
9ac3666
a186d85
 
 
 
9ac3666
d73ddf5
9ac3666
a186d85
 
9ac3666
a186d85
 
 
 
 
d5e3d23
a186d85
9ac3666
6fea677
d5e3d23
8513c99
d5e3d23
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
import onnxruntime
from pathlib import Path
from ultralytics import YOLO

# Load YOLOv5 model without AutoShape
model = torch.hub.load("ultralytics/yolov5", "yolov5n", source="local")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Fuse layers for optimization
model.fuse()

# Export to ONNX format
os.makedirs("models", exist_ok=True)
model_path = Path("models/yolov5n.onnx")

torch.onnx.export(
    model,
    torch.zeros(1, 3, 640, 640).to(device),  # Input tensor
    str(model_path),
    export_params=True,
    opset_version=11,
    do_constant_folding=True,
    input_names=["images"],
    output_names=["output"],
    dynamic_axes={"images": {0: "batch_size"}, "output": {0: "batch_size"}}
)

# Load ONNX model for inference
session = onnxruntime.InferenceSession(str(model_path), providers=['CUDAExecutionProvider'])

# Generate random colors for each class
np.random.seed(42)
colors = np.random.uniform(0, 255, size=(80, 3))  

total_inference_time = 0
inference_count = 0

def detect_objects(image):
    global total_inference_time, inference_count
    if image is None:
        return None
        
    start_time = time.time()
    
    # Preprocess image
    original_shape = image.shape
    input_shape = (640, 640)
    image_resized = cv2.resize(image, input_shape)
    image_norm = image_resized.astype(np.float32) / 255.0
    image_transposed = np.transpose(image_norm, (2, 0, 1))
    image_batch = np.expand_dims(image_transposed, axis=0)
    
    # Get input name and run inference
    input_name = session.get_inputs()[0].name
    outputs = session.run(None, {input_name: image_batch})
    
    # Process detections
    detections = outputs[0][0]  # First batch, all detections
    
    # Calculate timing
    inference_time = time.time() - start_time
    total_inference_time += inference_time
    inference_count += 1
    avg_inference_time = total_inference_time / inference_count
    fps = 1 / inference_time
    
    # Create a copy of the original image for visualization
    output_image = image.copy()
    
    # Scale factor for bounding box coordinates
    scale_x = original_shape[1] / input_shape[0]
    scale_y = original_shape[0] / input_shape[1]
    
    # Draw bounding boxes and labels
    for det in detections:
        x1, y1, x2, y2, conf, class_id = det[:6]
        if conf < 0.3:  # Confidence threshold
            continue
            
        # Convert to original image coordinates
        x1, x2 = int(x1 * scale_x), int(x2 * scale_x)
        y1, y2 = int(y1 * scale_y), int(y2 * scale_y)
        class_id = int(class_id)
        
        # Draw rectangle and label
        color = tuple(map(int, colors[class_id]))
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 2)
        label = f"Class {class_id} {conf:.2f}"
        cv2.putText(output_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    
    # Display FPS
    cv2.putText(output_image, f"FPS: {fps:.2f}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(output_image, f"Avg FPS: {1/avg_inference_time:.2f}", (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    return output_image

# Gradio Interface
example_images = ["spring_street_after.jpg", "pexels-hikaique-109919.jpg"]
os.makedirs("examples", exist_ok=True)

with gr.Blocks(title="Optimized YOLOv5 Object Detection") as demo:
    gr.Markdown("# **Optimized YOLOv5 Object Detection** πŸš€")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(label="Input Image", type="numpy")
            submit_button = gr.Button("Detect Objects", variant="primary")
            clear_button = gr.Button("Clear")

        with gr.Column(scale=1):
            output_image = gr.Image(label="Detected Objects", type="numpy")

    gr.Examples(
        examples=example_images,
        inputs=input_image,
        outputs=output_image,
        fn=detect_objects,
        cache_examples=True
    )

    submit_button.click(fn=detect_objects, inputs=input_image, outputs=output_image)
    clear_button.click(lambda: (None, None), None, [input_image, output_image])

demo.launch()