Spaces:

orpatashnik
/

NestedAttentionEncoder

Running on Zero

App Files Files Community

NestedAttentionEncoder / utils.py

orpatashnik

remove dlib dependency

399e621 9 days ago

raw

history blame

4.73 kB

	import numpy as np
	from PIL import Image
	import scipy.ndimage
	import insightface
	import torch
	import scipy

	# Initialize InsightFace model
	face_analyzer = insightface.app.FaceAnalysis(name='buffalo_l', providers=['CPUExecutionProvider'])
	face_analyzer.prepare(ctx_id=0)


	def image_grid(imgs, rows, cols):
	assert len(imgs) == rows*cols

	w, h = imgs[0].size
	grid = Image.new('RGB', size=(colsw, rowsh))
	grid_w, grid_h = grid.size

	for i, img in enumerate(imgs):
	grid.paste(img, box=(i%colsw, i//colsh))
	return grid


	def get_generator(seed, device):

	if seed is not None:
	if isinstance(seed, list):
	generator = [
	torch.Generator(device).manual_seed(seed_item) for seed_item in seed
	]
	else:
	generator = torch.Generator(device).manual_seed(seed)
	else:
	generator = None

	return generator

	def get_landmark_pil_insight(pil_image):
	"""Get 68 facial landmarks using InsightFace."""
	img_np = np.array(pil_image.convert("RGB"))
	faces = face_analyzer.get(img_np)
	if not faces:
	return None
	landmarks = faces[0].kps # shape: (5, 2) or (68, 2) depending on model

	if landmarks.shape[0] < 68:
	# InsightFace returns only 5 points: [left_eye, right_eye, nose, left_mouth, right_mouth]
	left_eye, right_eye, nose, left_mouth, right_mouth = landmarks
	# Approximate 68 landmarks (basic heuristic or fallback)
	return np.array([
	left_eye, right_eye, nose, left_mouth, right_mouth
	])
	return landmarks

	def align_face(pil_image):
	"""Align a face from a PIL.Image, returning an aligned PIL.Image of size 512x512."""
	lm = get_landmark_pil_insight(pil_image)
	if lm is None or lm.shape[0] < 5:
	return pil_image

	eye_left, eye_right = lm[0], lm[1]
	eye_avg = (eye_left + eye_right) * 0.5
	eye_to_eye = eye_right - eye_left
	mouth_left, mouth_right = lm[3], lm[4]
	mouth_avg = (mouth_left + mouth_right) * 0.5
	eye_to_mouth = mouth_avg - eye_avg

	# The rest is your original alignment logic
	x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
	x /= np.hypot(*x)
	x = max(np.hypot(eye_to_eye) * 2.0, np.hypot(eye_to_mouth) 1.8)
	y = np.flipud(x) * [-1, 1]
	c = eye_avg + eye_to_mouth * 0.1
	quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
	qsize = np.hypot(x) 2

	img = pil_image.convert("RGB")
	transform_size = 512
	output_size = 512
	enable_padding = True

	shrink = int(np.floor(qsize / output_size * 0.5))
	if shrink > 1:
	rsize = (int(np.rint(img.size[0] / shrink)), int(np.rint(img.size[1] / shrink)))
	img = img.resize(rsize, Image.Resampling.LANCZOS)
	quad /= shrink
	qsize /= shrink

	border = max(int(np.rint(qsize * 0.1)), 3)
	crop = (
	int(np.floor(min(quad[:, 0]))),
	int(np.floor(min(quad[:, 1]))),
	int(np.ceil(max(quad[:, 0]))),
	int(np.ceil(max(quad[:, 1])))
	)
	crop = (
	max(crop[0] - border, 0),
	max(crop[1] - border, 0),
	min(crop[2] + border, img.size[0]),
	min(crop[3] + border, img.size[1])
	)
	if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
	img = img.crop(crop)
	quad -= crop[:2]

	pad = (
	int(np.floor(min(quad[:, 0]))),
	int(np.floor(min(quad[:, 1]))),
	int(np.ceil(max(quad[:, 0]))),
	int(np.ceil(max(quad[:, 1])))
	)
	pad = (
	max(-pad[0] + border, 0),
	max(-pad[1] + border, 0),
	max(pad[2] - img.size[0] + border, 0),
	max(pad[3] - img.size[1] + border, 0)
	)
	if enable_padding and max(pad) > border - 4:
	pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
	img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
	h, w, _ = img.shape
	y, x, _ = np.ogrid[:h, :w, :1]
	mask = np.maximum(
	1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
	1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3])
	)
	blur = qsize * 0.02
	img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
	img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
	img = Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
	quad += pad[:2]

	img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
	if output_size < transform_size:
	img = img.resize((output_size, output_size), Image.Resampling.LANCZOS)

	return img