Spaces:

danhtran2mind
/

SlimFace-demo

Running

App Files Files Community

SlimFace-demo / src /third_party /edgeface /face_alignment /mtcnn.py

danhtran2mind

Upload 258 files

20cf96a verified 13 days ago

raw

history blame contribute delete

7.04 kB

	from typing import Tuple
	import numpy as np
	import torch
	from PIL import Image
	from torch.autograd import Variable

	import sys
	import os

	sys.path.insert(0, os.path.dirname(__file__))

	from mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
	from mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
	from mtcnn_pytorch.src.first_stage import run_first_stage
	from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face


	class MTCNN():
	def __init__(self, device: str = 'cuda:0', crop_size: Tuple[int, int] = (112, 112)):

	assert device in ['cuda:0', 'cpu']
	self.device = torch.device(device)
	assert crop_size in [(112, 112), (96, 112)]
	self.crop_size = crop_size

	# change working dir to this file location to load npz files. Then switch back
	cwd = os.getcwd()
	os.chdir(os.path.dirname(__file__))

	self.pnet = PNet().to(self.device)
	self.rnet = RNet().to(self.device)
	self.onet = ONet().to(self.device)
	self.pnet.eval()
	self.rnet.eval()
	self.onet.eval()
	self.refrence = get_reference_facial_points(default_square=crop_size[0] == crop_size[1])

	self.min_face_size = 20
	self.thresholds = [0.6,0.7,0.9]
	self.nms_thresholds = [0.7, 0.7, 0.7]
	self.factor = 0.85


	os.chdir(cwd)

	def align(self, img):
	_, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
	facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
	warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
	return Image.fromarray(warped_face)

	def align_multi(self, img, limit=None):
	boxes, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
	if limit:
	boxes = boxes[:limit]
	landmarks = landmarks[:limit]
	faces = []
	for landmark in landmarks:
	facial5points = [[landmark[j], landmark[j + 5]] for j in range(5)]
	warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
	faces.append(Image.fromarray(warped_face))
	return boxes, faces

	def detect_faces(self, image, min_face_size, thresholds, nms_thresholds, factor):
	"""
	Arguments:
	image: an instance of PIL.Image.
	min_face_size: a float number.
	thresholds: a list of length 3.
	nms_thresholds: a list of length 3.

	Returns:
	two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
	bounding boxes and facial landmarks.
	"""

	# BUILD AN IMAGE PYRAMID
	width, height = image.size
	min_length = min(height, width)

	min_detection_size = 12
	# factor = 0.707 # sqrt(0.5)

	# scales for scaling the image
	scales = []

	# scales the image so that
	# minimum size that we can detect equals to
	# minimum face size that we want to detect
	m = min_detection_size / min_face_size
	min_length *= m

	factor_count = 0
	while min_length > min_detection_size:
	scales.append(m * factor**factor_count)
	min_length *= factor
	factor_count += 1

	# STAGE 1

	# it will be returned
	bounding_boxes = []

	with torch.no_grad():
	# run P-Net on different scales
	for s in scales:
	boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
	bounding_boxes.append(boxes)

	# collect boxes (and offsets, and scores) from different scales
	bounding_boxes = [i for i in bounding_boxes if i is not None]
	if len(bounding_boxes) == 0:
	return [], []
	bounding_boxes = np.vstack(bounding_boxes)

	keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
	bounding_boxes = bounding_boxes[keep]

	# use offsets predicted by pnet to transform bounding boxes
	bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
	# shape [n_boxes, 5]

	bounding_boxes = convert_to_square(bounding_boxes)
	bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

	# STAGE 2

	img_boxes = get_image_boxes(bounding_boxes, image, size=24)
	img_boxes = torch.FloatTensor(img_boxes).to(self.device)

	output = self.rnet(img_boxes)
	offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4]
	probs = output[1].cpu().data.numpy() # shape [n_boxes, 2]

	keep = np.where(probs[:, 1] > thresholds[1])[0]
	bounding_boxes = bounding_boxes[keep]
	bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
	offsets = offsets[keep]

	keep = nms(bounding_boxes, nms_thresholds[1])
	bounding_boxes = bounding_boxes[keep]
	bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
	bounding_boxes = convert_to_square(bounding_boxes)
	bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

	# STAGE 3

	img_boxes = get_image_boxes(bounding_boxes, image, size=48)
	if len(img_boxes) == 0:
	return [], []
	img_boxes = torch.FloatTensor(img_boxes).to(self.device)
	output = self.onet(img_boxes)
	landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10]
	offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4]
	probs = output[2].cpu().data.numpy() # shape [n_boxes, 2]

	keep = np.where(probs[:, 1] > thresholds[2])[0]
	bounding_boxes = bounding_boxes[keep]
	bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
	offsets = offsets[keep]
	landmarks = landmarks[keep]

	# compute landmark points
	width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
	height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
	xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
	landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
	landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]

	bounding_boxes = calibrate_box(bounding_boxes, offsets)
	keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
	bounding_boxes = bounding_boxes[keep]
	landmarks = landmarks[keep]

	return bounding_boxes, landmarks