File size: 7,367 Bytes
caff61e
bccf53b
dc80d48
0152e0c
 
a186d85
 
caff61e
a186d85
 
 
 
0152e0c
0e19825
b5a364c
a186d85
 
 
 
 
 
 
 
 
 
 
36e1064
0e19825
 
 
 
 
a186d85
0152e0c
a186d85
 
 
 
 
0152e0c
0e19825
 
0152e0c
0e19825
 
6fb7418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e82b28e
a186d85
 
 
 
8513c99
0e19825
a186d85
0e19825
a186d85
 
 
 
0e19825
 
 
 
a186d85
0e19825
 
 
8513c99
a186d85
0e19825
3e3644e
a186d85
 
 
 
 
 
0e19825
 
 
 
 
 
 
6fb7418
0e19825
6fb7418
0e19825
 
 
6fb7418
6fea677
0e19825
6fea677
6fb7418
 
 
 
 
0e19825
6fea677
 
6fb7418
6fea677
 
0e19825
a186d85
 
0e19825
6fb7418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fea677
6fb7418
 
 
 
 
 
 
 
0e19825
 
8513c99
a186d85
 
6fb7418
 
a186d85
 
 
 
 
 
 
 
 
 
 
 
 
0e19825
 
 
a186d85
6fb7418
a186d85
 
 
 
 
 
6fb7418
 
a186d85
 
 
 
 
 
 
 
 
 
 
 
6fea677
6fb7418
 
 
 
 
 
 
 
 
8513c99
a186d85
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import torch
import numpy as np
import gradio as gr
import cv2
import time
import os
from pathlib import Path

# Create cache directory for models if it doesn't exist
os.makedirs("models", exist_ok=True)

# Check device availability - Hugging Face Spaces often provides GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load YOLOv5x model with caching for faster startup
model_path = Path("models/yolov5x.pt")
if model_path.exists():
    print(f"Loading model from cache: {model_path}")
    model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True, 
                          source="local", path=str(model_path)).to(device)
else:
    print("Downloading YOLOv5x model and caching...")
    model = torch.hub.load("ultralytics/yolov5", "yolov5x", pretrained=True).to(device)
    # Cache the model for faster startup next time
    torch.save(model.state_dict(), model_path)

# Optimization configurations
model.conf = 0.3  # Confidence threshold of 0.3 as specified
model.iou = 0.3   # NMS IoU threshold of 0.3 as specified
model.classes = None  # Detect all 80+ COCO classes

# Optimize for GPU if available
if device.type == "cuda":
    # Use mixed precision for performance boost
    model.half()
else:
    # On CPU, optimize operations
    torch.set_num_threads(os.cpu_count())

# Set model to evaluation mode for inference
model.eval()

# Assign fixed colors to each class for consistent visualization
np.random.seed(42)  # For reproducible colors
# Generate more attractive, vibrant colors
colors = []
for i in range(len(model.names)):
    # Use HSV color space for more vibrant colors
    hue = i / len(model.names)
    # Full saturation and value for vivid colors
    saturation = 0.9
    value = 1.0
    # Convert HSV to RGB
    h = hue * 360
    s = saturation
    v = value
    c = v * s
    x = c * (1 - abs((h / 60) % 2 - 1))
    m = v - c
    
    if h < 60:
        r, g, b = c, x, 0
    elif h < 120:
        r, g, b = x, c, 0
    elif h < 180:
        r, g, b = 0, c, x
    elif h < 240:
        r, g, b = 0, x, c
    elif h < 300:
        r, g, b = x, 0, c
    else:
        r, g, b = c, 0, x
    
    r, g, b = (r + m) * 255, (g + m) * 255, (b + m) * 255
    colors.append([int(b), int(g), int(r)])  # OpenCV uses BGR

# Track performance metrics
total_inference_time = 0
inference_count = 0

def detect_objects(image):
    
    global total_inference_time, inference_count
    
    if image is None:
        return None
    
    start_time = time.time()
    
    # Create a copy for drawing results
    output_image = image.copy()
    
    # Fixed input size for optimal processing
    input_size = 640
    
    # Perform inference with no gradient calculation
    with torch.no_grad():
        # Convert image to tensor for faster processing
        results = model(image, size=input_size)
    
    # Record inference time (model processing only)
    inference_time = time.time() - start_time
    total_inference_time += inference_time
    inference_count += 1
    avg_inference_time = total_inference_time / inference_count
    
    # Extract detections from first (and only) image
    detections = results.pred[0].cpu().numpy()
    
    for *xyxy, conf, cls in detections:
        x1, y1, x2, y2 = map(int, xyxy)
        class_id = int(cls)
        
        color = colors[class_id]
        
        cv2.rectangle(output_image, (x1, y1), (x2, y2), color, 3)
        
        label = f"{model.names[class_id]} {conf:.2f}"
        
        font_scale = 0.7
        font_thickness = 2
        
        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)
        
        alpha = 0.7  
        overlay = output_image.copy()
        cv2.rectangle(overlay, (x1, y1 - h - 10), (x1 + w + 10, y1), color, -1)
        output_image = cv2.addWeighted(overlay, alpha, output_image, 1 - alpha, 0)
        
        cv2.putText(output_image, label, (x1 + 5, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), font_thickness + 1)

        cv2.putText(output_image, label, (x1 + 5, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), font_thickness)
    
    # Calculate FPS
    fps = 1 / inference_time
    

    h, w = output_image.shape[:2]
 
    overlay = output_image.copy()
    
    fps_bg_height = 90
    fps_bg_width = 200
    fps_bg_corner = 15  
    
    for i in range(fps_bg_height):
        alpha = 0.8 - (i / fps_bg_height * 0.3) 
        color_value = int(220 * (1 - i / fps_bg_height)) 
        cv2.rectangle(overlay, 
                      (10, 10 + i), 
                      (fps_bg_width, 10 + i), 
                      (40, color_value, 40), 
                      -1)

    cv2.addWeighted(overlay, 0.8, output_image, 0.2, 0, output_image, 
                    dst=output_image[10:10+fps_bg_height, 10:10+fps_bg_width])
    
    cv2.rectangle(output_image, 
                  (10, 10), 
                  (fps_bg_width, 10 + fps_bg_height), 
                  (255, 255, 255), 
                  2, 
                  cv2.LINE_AA)
    
    cv2.putText(output_image, "Performance", (20, 35),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
    
    cv2.putText(output_image, f"Current: {fps:.1f} FPS", (20, 65),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
    
    cv2.putText(output_image, f"Average: {1/avg_inference_time:.1f} FPS", (20, 90),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
    
    return output_image

# Define example images - these will be stored in the same directory as this script
example_images = [
    "spring_street_after.jpg", 
    "pexels-hikaique-109919.jpg"
]

# Make sure example directory exists
os.makedirs("examples", exist_ok=True)

# Create Gradio interface - optimized for Hugging Face Spaces
with gr.Blocks(title="Optimized YOLOv5 Object Detection") as demo:
    gr.Markdown("""
    # Optimized YOLOv5 Object Detection
    
    This system utilizes YOLOv5 to detect 80+ object types from the COCO dataset.
    
    **Performance Features:**
    - Processing speed: Optimized for 30+ FPS at 640x640 resolution
    - Confidence threshold: 0.3
    - IoU threshold: 0.3
    
    Upload an image, then click Submit to see the detections!
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(label="Input Image", type="numpy")
            with gr.Row():
                clear_button = gr.Button("Clear", size="sm")
                submit_button = gr.Button("Submit", variant="primary", size="lg")
                
        with gr.Column(scale=1):
            output_image = gr.Image(label="Detected Objects", type="numpy")
    
    gr.Examples(
        examples=example_images,
        inputs=input_image,
        outputs=output_image,
        fn=detect_objects,
        cache_examples=True  # Cache for faster response
    )
    
    submit_button.click(fn=detect_objects, inputs=input_image, outputs=output_image)
    clear_button.click(
        fn=lambda: (None, None),  
        outputs=[input_image, output_image],
        queue=False
    ).then(
        fn=detect_objects, 
        inputs=input_image,
        outputs=output_image
    )

# Launch for Hugging Face Spaces
demo.launch()