Spaces:

fffiloni
/

Stand-In

Running on Zero

Stand-In / preprocessor /image_input_preprocessor.py

Migrated from GitHub

26557da verified about 2 months ago

6.24 kB

	import os
	import cv2
	import requests
	import torch
	import numpy as np
	import PIL.Image
	import PIL.ImageOps
	from insightface.app import FaceAnalysis
	from facexlib.parsing import init_parsing_model
	from torchvision.transforms.functional import normalize
	from typing import Union, Optional


	def _img2tensor(img: np.ndarray, bgr2rgb: bool = True) -> torch.Tensor:
	if bgr2rgb:
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	img = img.astype(np.float32) / 255.0
	img = np.transpose(img, (2, 0, 1))
	return torch.from_numpy(img)


	def _pad_to_square(img: np.ndarray, pad_color: int = 255) -> np.ndarray:
	h, w, _ = img.shape
	if h == w:
	return img

	if h > w:
	pad_size = (h - w) // 2
	padded_img = cv2.copyMakeBorder(
	img,
	0,
	0,
	pad_size,
	h - w - pad_size,
	cv2.BORDER_CONSTANT,
	value=[pad_color] * 3,
	)
	else:
	pad_size = (w - h) // 2
	padded_img = cv2.copyMakeBorder(
	img,
	pad_size,
	w - h - pad_size,
	0,
	0,
	cv2.BORDER_CONSTANT,
	value=[pad_color] * 3,
	)

	return padded_img


	class FaceProcessor:
	def __init__(self, antelopv2_path=".", device: Optional[torch.device] = None):
	if device is None:
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	else:
	self.device = device

	providers = (
	["CUDAExecutionProvider"]
	if self.device.type == "cuda"
	else ["CPUExecutionProvider"]
	)
	self.app = FaceAnalysis(
	name="antelopev2", root=antelopv2_path, providers=providers
	)
	self.app.prepare(ctx_id=0, det_size=(640, 640))

	self.parsing_model = init_parsing_model(
	model_name="bisenet", device=self.device
	)
	self.parsing_model.eval()

	print("FaceProcessor initialized successfully.")

	def process(
	self,
	image: Union[str, PIL.Image.Image],
	resize_to: int = 512,
	border_thresh: int = 10,
	face_crop_scale: float = 1.5,
	extra_input: bool = False,
	) -> PIL.Image.Image:
	if isinstance(image, str):
	if image.startswith("http://") or image.startswith("https://"):
	image = PIL.Image.open(requests.get(image, stream=True, timeout=10).raw)
	elif os.path.isfile(image):
	image = PIL.Image.open(image)
	else:
	raise ValueError(
	f"Input string is not a valid URL or file path: {image}"
	)
	elif not isinstance(image, PIL.Image.Image):
	raise TypeError(
	"Input must be a file path, a URL, or a PIL.Image.Image object."
	)

	image = PIL.ImageOps.exif_transpose(image).convert("RGB")

	frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

	faces = self.app.get(frame)
	h, w, _ = frame.shape
	image_to_process = None

	if not faces:
	print(
	"[Warning] No face detected. Using the whole image, padded to square."
	)
	image_to_process = _pad_to_square(frame, pad_color=255)
	else:
	largest_face = max(
	faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1])
	)
	x1, y1, x2, y2 = map(int, largest_face.bbox)

	is_close_to_border = (
	x1 <= border_thresh
	and y1 <= border_thresh
	and x2 >= w - border_thresh
	and y2 >= h - border_thresh
	)

	if is_close_to_border:
	print(
	"[Info] Face is close to border, padding original image to square."
	)
	image_to_process = _pad_to_square(frame, pad_color=255)
	else:
	cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
	side = int(max(x2 - x1, y2 - y1) * face_crop_scale)
	half = side // 2

	left = max(cx - half, 0)
	top = max(cy - half, 0)
	right = min(cx + half, w)
	bottom = min(cy + half, h)

	cropped_face = frame[top:bottom, left:right]
	image_to_process = _pad_to_square(cropped_face, pad_color=255)

	image_resized = cv2.resize(
	image_to_process, (resize_to, resize_to), interpolation=cv2.INTER_AREA
	)

	face_tensor = (
	_img2tensor(image_resized, bgr2rgb=True).unsqueeze(0).to(self.device)
	)
	with torch.no_grad():
	normalized_face = normalize(face_tensor, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
	parsing_out = self.parsing_model(normalized_face)[0]
	parsing_mask = parsing_out.argmax(dim=1, keepdim=True)

	background_mask_np = (parsing_mask.squeeze().cpu().numpy() == 0).astype(
	np.uint8
	)
	white_background = np.ones_like(image_resized, dtype=np.uint8) * 255
	mask_3channel = cv2.cvtColor(background_mask_np * 255, cv2.COLOR_GRAY2BGR)
	result_img_bgr = np.where(mask_3channel == 255, white_background, image_resized)
	result_img_rgb = cv2.cvtColor(result_img_bgr, cv2.COLOR_BGR2RGB)
	img_white_bg = PIL.Image.fromarray(result_img_rgb)
	if extra_input:
	# 2. Create image with transparent background (new logic)
	# Create an alpha channel: 255 for foreground (not background), 0 for background
	alpha_channel = (parsing_mask.squeeze().cpu().numpy() != 0).astype(
	np.uint8
	) * 255

	# Convert the resized BGR image to RGB
	image_resized_rgb = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)

	# Stack RGB channels with the new alpha channel
	rgba_image = np.dstack((image_resized_rgb, alpha_channel))

	# Create PIL image from the RGBA numpy array
	img_transparent_bg = PIL.Image.fromarray(rgba_image, "RGBA")

	return img_white_bg, img_transparent_bg
	else:
	return img_white_bg