# Author: khaclinh import os os.system('pip install yolox') import gradio as gr import torch import numpy as np from PIL import Image import importlib import cv2 from yolox.utils import postprocess from yolox.data.data_augment import ValTransform ckpt_file = 'model_weights/best_ckpt.pth' # get YOLOX experiment current_exp = importlib.import_module('pp4av_exp') exp = current_exp.Exp() # set inference parameters test_size = (800, 800) num_classes = 2 nmsthre = 0.3 GDPR_CLASSES = ( "Face", "Plate" ) # get YOLOX model model = exp.get_model() #model.cuda() model.eval() # get custom trained checkpoint ckpt = torch.load(ckpt_file, map_location="cpu") model.load_state_dict(ckpt["model"]) def yolox_inference(img, model, prob_threshold, test_size): bboxes = [] bbclasses = [] scores = [] preproc = ValTransform(legacy = False) tensor_img, _ = preproc(img, None, test_size) tensor_img = torch.from_numpy(tensor_img).unsqueeze(0) tensor_img = tensor_img.float() #tensor_img = tensor_img.cuda() with torch.no_grad(): outputs = model(tensor_img) outputs = postprocess( outputs, num_classes, prob_threshold, nmsthre, class_agnostic=True ) if outputs[0] is None: return [], [], [] outputs = outputs[0].cpu() bboxes = outputs[:, 0:4] bboxes /= min(test_size[0] / img.shape[0], test_size[1] / img.shape[1]) bbclasses = outputs[:, 6] scores = outputs[:, 4] * outputs[:, 5] return bboxes, bbclasses, scores def draw_yolox_predictions(img, bboxes, scores, bbclasses, prob_threshold, classes_dict): for i in range(len(bboxes)): box = bboxes[i] cls_id = int(bbclasses[i]) score = scores[i] if score < prob_threshold: continue x0 = int(box[0]) y0 = int(box[1]) x1 = int(box[2]) y1 = int(box[3]) if cls_id == 0: cv2.rectangle(img, (x0, y0), (x1, y1), (0, 255, 0), 2) cv2.putText(img, '{}:{:.1f}%'.format(classes_dict[cls_id], score * 100), (x0, y0 - 3), cv2.FONT_HERSHEY_PLAIN, 0.8, (0,255,0), thickness = 1) else: cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 2) cv2.putText(img, '{}:{:.1f}%'.format(classes_dict[cls_id], score * 100), (x0, y0 - 3), cv2.FONT_HERSHEY_PLAIN, 0.8, (255,0,0), thickness = 1) return img def pp4av_detect(img, prob_threshold=0.1): # Convert PIL image to CV2 open_cv_image = np.array(img) # Convert RGB to BGR open_cv_image = open_cv_image[:, :, ::-1].copy() bboxes, bbclasses, scores = yolox_inference(open_cv_image, model, prob_threshold, test_size) out = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB) # Draw predictions out_image = draw_yolox_predictions(out, bboxes, scores, bbclasses, prob_threshold, GDPR_CLASSES) return Image.fromarray(out_image) img_input = gr.inputs.Image(type='pil', label="Original Image") img_output = gr.outputs.Image(type="pil", label="Output Image") prob_threshold_slider = gr.Slider(minimum=0, maximum=1.0, step=0.01, value=0.1, label="Confidence Threshold") title = "PP4AV: Deep Learning model for Data Anonymization in Autonomous Driving" description = "Detecting faces and license plates in image data from self-driving cars. Take a picture, upload an image, or click an example image to use." article = "" examples = [['data/fisheye.jpg'], ['data/zurich.jpg'], ['data/stuttgart.jpg'], ['data/strasbourg.jpg']] gr.Interface( fn = pp4av_detect, inputs = [img_input, prob_threshold_slider], outputs = img_output, title = title, description = description, article = article, examples = examples, theme = "huggingface" ).launch(enable_queue=True)