File size: 4,450 Bytes
c6bbca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f042d39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6bbca4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import cv2 as cv
import numpy as np
import gradio as gr
from huggingface_hub import hf_hub_download
from yolox import YoloX

# Download YOLOX model from Hugging Face (optional fallback)
model_path = hf_hub_download(
    repo_id="opencv/object_detection_yolox",
    filename="object_detection_yolox_2022nov.onnx"
)

# Initialize YOLOX model
model = YoloX(
    modelPath=model_path,
    confThreshold=0.5,
    nmsThreshold=0.5,
    objThreshold=0.5,
    backendId=cv.dnn.DNN_BACKEND_OPENCV,
    targetId=cv.dnn.DNN_TARGET_CPU
)

classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
           'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
           'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
           'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
           'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
           'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
           'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
           'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
           'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
           'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
           'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
           'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
           'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
           'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')

def letterbox(srcimg, target_size=(640, 640)):
    padded_img = np.ones((target_size[0], target_size[1], 3), dtype=np.float32) * 114.0
    ratio = min(target_size[0] / srcimg.shape[0], target_size[1] / srcimg.shape[1])
    resized_img = cv.resize(srcimg, (int(srcimg.shape[1] * ratio), int(srcimg.shape[0] * ratio)), interpolation=cv.INTER_LINEAR).astype(np.float32)
    padded_img[:int(srcimg.shape[0] * ratio), :int(srcimg.shape[1] * ratio)] = resized_img
    return padded_img, ratio

def unletterbox(bbox, scale):
    return bbox / scale

def visualize(dets, image, scale):
    res_img = image.copy()
    h, w = res_img.shape[:2]
    font_scale = max(0.5, min(w, h) / 640.0 * 0.5)
    thickness = max(1, int(font_scale * 2))

    for det in dets:
        box = unletterbox(det[:4], scale).astype(np.int32)
        score = det[-2]
        cls_id = int(det[-1])

        x0, y0, x1, y1 = box
        label = '{}:{:.1f}%'.format(classes[cls_id], score * 100)

        cv.rectangle(res_img, (x0, y0), (x1, y1), (0, 255, 0), thickness)
        (tw, th), _ = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
        cv.rectangle(res_img, (x0, y0), (x0 + tw + 2, y0 + th + 4), (255, 255, 255), -1)
        cv.putText(res_img, label, (x0, y0 + th), cv.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness)

    return res_img

def detect_objects(input_image):
    bgr = cv.cvtColor(input_image, cv.COLOR_RGB2BGR)
    input_blob, scale = letterbox(cv.cvtColor(bgr, cv.COLOR_BGR2RGB))

    results = model.infer(input_blob)
    if results is None or len(results) == 0:
        return input_image

    vis_image = visualize(results, bgr, scale)
    return cv.cvtColor(vis_image, cv.COLOR_BGR2RGB)

def clear_all():
    return None, None

def clear_output():
    return None

with gr.Blocks(css='''.example * {
    font-style: italic;
    font-size: 18px !important;
    color: #0ea5e9 !important;
    }''') as demo:

    gr.Markdown("### YOLOX Object Detection (OpenCV + ONNX)")
    gr.Markdown("Upload an image to detect objects using YOLOX ONNX model and OpenCV DNN.")

    with gr.Row():
        image_input = gr.Image(type="numpy", label="Upload Image")
        output_image = gr.Image(type="numpy", label="Detected Objects")

    # Clear output when new image is uploaded
    image_input.change(fn=clear_output, outputs=[output_image])

    with gr.Row():
        submit_btn = gr.Button("Submit", variant="primary")
        clear_btn = gr.Button("Clear")

    submit_btn.click(fn=detect_objects, inputs=image_input, outputs=output_image)
    clear_btn.click(fn=clear_all, outputs=[image_input, output_image])

    gr.Markdown("Click on any example to try it.", elem_classes=["example"])

    gr.Examples(
        examples=[
            ["examples/left.jpg"],
            ["examples/messi5.jpg"]
        ],
        inputs=image_input
    )


if __name__ == "__main__":
    demo.launch()