File size: 3,914 Bytes
dd090c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Author: khaclinh
import os

os.system('pip install yolox')

import gradio as gr
import torch
import numpy as np
from PIL import Image
import importlib

import cv2 

from yolox.utils import postprocess
from yolox.data.data_augment import ValTransform

ckpt_file = 'model_weights/best_ckpt.pth'

# get YOLOX experiment
current_exp = importlib.import_module('pp4av_exp')
exp = current_exp.Exp()

# set inference parameters
test_size = (800, 800)
num_classes = 2
nmsthre = 0.3

GDPR_CLASSES = (
    "Face",
    "Plate"
)


# get YOLOX model
model = exp.get_model()
#model.cuda()
model.eval()

# get custom trained checkpoint
ckpt = torch.load(ckpt_file, map_location="cpu")
model.load_state_dict(ckpt["model"])


def yolox_inference(img, model, prob_threshold, test_size): 
    bboxes = []
    bbclasses = []
    scores = []
    
    preproc = ValTransform(legacy = False)

    tensor_img, _ = preproc(img, None, test_size)
    tensor_img = torch.from_numpy(tensor_img).unsqueeze(0)
    tensor_img = tensor_img.float()
    #tensor_img = tensor_img.cuda()

    with torch.no_grad():
        outputs = model(tensor_img)
        outputs = postprocess(
                    outputs, num_classes, prob_threshold,
                    nmsthre, class_agnostic=True
                )

    if outputs[0] is None:
        return [], [], []
    
    outputs = outputs[0].cpu()
    bboxes = outputs[:, 0:4]

    bboxes /= min(test_size[0] / img.shape[0], test_size[1] / img.shape[1])
    bbclasses = outputs[:, 6]
    scores = outputs[:, 4] * outputs[:, 5]
    
    return bboxes, bbclasses, scores


def draw_yolox_predictions(img, bboxes, scores, bbclasses, prob_threshold, classes_dict):
    for i in range(len(bboxes)):
            box = bboxes[i]
            cls_id = int(bbclasses[i])
            score = scores[i]
            if score < prob_threshold:
                continue
            x0 = int(box[0])
            y0 = int(box[1])
            x1 = int(box[2])
            y1 = int(box[3])
            if cls_id == 0:

                cv2.rectangle(img, (x0, y0), (x1, y1), (0, 255, 0), 2)
                cv2.putText(img, '{}:{:.1f}%'.format(classes_dict[cls_id], score * 100), (x0, y0 - 3), cv2.FONT_HERSHEY_PLAIN, 0.8, (0,255,0), thickness = 1)
            else:
                cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 2)
                cv2.putText(img, '{}:{:.1f}%'.format(classes_dict[cls_id], score * 100), (x0, y0 - 3), cv2.FONT_HERSHEY_PLAIN, 0.8, (255,0,0), thickness = 1)
            
            
    return img


def pp4av_detect(img, prob_threshold=0.1):
    # Convert PIL image to CV2
    open_cv_image = np.array(img) 
    # Convert RGB to BGR 
    open_cv_image = open_cv_image[:, :, ::-1].copy() 

    bboxes, bbclasses, scores = yolox_inference(open_cv_image, model, prob_threshold, test_size)

    out = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB)
    # Draw predictions
    out_image = draw_yolox_predictions(out, bboxes, scores, bbclasses, prob_threshold, GDPR_CLASSES)
    
    return Image.fromarray(out_image)


img_input = gr.inputs.Image(type='pil', label="Original Image")
img_output = gr.outputs.Image(type="pil", label="Output Image")

prob_threshold_slider = gr.Slider(minimum=0, maximum=1.0, step=0.01, value=0.1, label="Confidence Threshold")

title = "PP4AV: Deep Learning model for Data Anonymization in Autonomous Driving"
description = "Detecting faces and license plates in image data from self-driving cars. Take a picture, upload an image, or click an example image to use."
article = ""

examples = [['data/fisheye.jpg'], ['data/zurich.jpg'], ['data/stuttgart.jpg'], ['data/strasbourg.jpg']]
gr.Interface(
    fn = pp4av_detect, 
    inputs = [img_input, prob_threshold_slider], 
    outputs = img_output, 
    title = title, 
    description = description, 
    article = article, 
    examples = examples, 
    theme = "huggingface"
).launch(enable_queue=True)