Spaces:

stepfun-ai
/

NextStep-1-Large-Edit

Running on Zero

App Files Files Community

NextStep-1-Large-Edit / utils /image_utils.py

jingwwu

Upload folder using huggingface_hub

efe4293 verified 8 days ago

raw

history blame contribute delete

11.2 kB

	import io
	import os
	from typing import Literal, TypeAlias

	import numpy as np
	import PIL.Image
	import PIL.ImageOps
	import requests
	import torch

	"""
	- pil: `PIL.Image.Image`, size (w, h), seamless conversion between `uint8`
	- np: `np.ndarray`, shape (h, w, c), default `np.uint8`
	- pt: `torch.Tensor`, shape (c, h, w), default `torch.uint8`
	"""
	ImageType: TypeAlias = PIL.Image.Image \| np.ndarray \| torch.Tensor
	ImageTypeStr: TypeAlias = Literal["pil", "np", "pt"]
	ImageFormat: TypeAlias = Literal["JPEG", "PNG"]
	DataFormat: TypeAlias = Literal["255", "01", "11"]


	IMG_SUPPORT_MODE = ["L", "LA", "RGB", "RGBA", "CMYK", "P", "1"]
	IMAGE_EXT_LOWER = ["png", "jpeg", "jpg", "webp"]
	IMAGE_EXT = IMAGE_EXT_LOWER + [_ext.upper() for _ext in IMAGE_EXT_LOWER]


	def check_image_type(image: ImageType):
	if not (isinstance(image, PIL.Image.Image) or isinstance(image, np.ndarray) or isinstance(image, torch.Tensor)):
	raise TypeError(f"`image` should be PIL Image, ndarray or Tensor. Got `{type(image)}`.")


	def to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
	# Automatically adjust the orientation of the image to match the direction it was taken.
	image = PIL.ImageOps.exif_transpose(image)

	if image.mode not in IMG_SUPPORT_MODE:
	raise ValueError(f"Only support mode in `{IMG_SUPPORT_MODE}`, got `{image.mode}`")

	if image.mode == "LA":
	image = image.convert("RGBA")

	# add white background for RGBA images, and convert to RGB
	if image.mode == "RGBA":
	background = PIL.Image.new("RGBA", image.size, "white")
	image = PIL.Image.alpha_composite(background, image).convert("RGB")

	# then convert to RGB
	image = image.convert("RGB")

	return image


	def load_image(
	image: str \| os.PathLike \| PIL.Image.Image \| bytes,
	*,
	output_type: ImageTypeStr = "pil",
	) -> ImageType:
	"""
	Loads `image` to a PIL Image, NumPy array or PyTorch tensor.

	Args:
	image (str \| PIL.Image.Image): The path to image or PIL Image.
	mode (ImageMode, optional): The mode to convert to. Defaults to None (no conversion).
	The current version supports all possible conversions between "L", "RGB", "RGBA".
	output_type (ImageTypeStr, optional): The type of the output image. Defaults to "pil".
	The current version supports "pil", "np", "pt".

	Returns:
	ImageType: The loaded image in the given type.
	"""
	timeout = 10
	# Load the `image` into a PIL Image.
	if isinstance(image, str) or isinstance(image, os.PathLike):
	if image.startswith("http://") or image.startswith("https://"):
	try:
	image = PIL.Image.open(requests.get(image, stream=True, timeout=timeout).raw)
	except requests.exceptions.Timeout:
	raise ValueError(f"HTTP request timed out after {timeout} seconds")
	elif os.path.isfile(image):
	image = PIL.Image.open(image)
	else:
	raise ValueError(
	f"Incorrect path or url, URLs must start with `http://`, `https://` or `s3+[profile]://`, and `{image}` is not a valid path."
	)
	elif isinstance(image, PIL.Image.Image):
	image = image
	elif isinstance(image, bytes):
	image = PIL.Image.open(io.BytesIO(image))
	else:
	raise ValueError(f"`image` must be a path or PIL Image, got `{type(image)}`")

	image = to_rgb(image)

	if output_type == "pil":
	image = image
	elif output_type == "np":
	image = to_np(image)
	elif output_type == "pt":
	image = to_pt(image)
	else:
	raise ValueError(f"`output_type` must be one of `{ImageTypeStr}`, got `{output_type}`")

	return image


	def to_pil(image: ImageType, image_mode: DataFormat \| None = None) -> PIL.Image.Image:
	"""
	Convert a NumPy array or a PyTorch tensor to a PIL image.
	"""
	check_image_type(image)

	if isinstance(image, PIL.Image.Image):
	return image

	elif isinstance(image, np.ndarray):
	image = normalize_np(image, image_mode)

	elif isinstance(image, torch.Tensor):
	image = normalize_pt(image, image_mode)

	image = image.cpu().permute(1, 2, 0).numpy()
	assert image.dtype == np.uint8, f"Supposed to convert `torch.uint8` to `np.uint8`, but got `{image.dtype}`"

	mode_map = {1: "L", 3: "RGB"}
	mode = mode_map[image.shape[-1]]

	if image.shape[-1] == 1:
	image = image[:, :, 0]

	return PIL.Image.fromarray(image, mode=mode)


	def to_np(image: ImageType, image_mode: DataFormat \| None = None) -> np.ndarray:
	"""
	Convert a PIL image or a PyTorch tensor to a NumPy array.
	"""
	check_image_type(image)

	if isinstance(image, PIL.Image.Image):
	image = np.array(image, np.uint8, copy=True)

	if isinstance(image, np.ndarray):
	image = normalize_np(image, image_mode)

	elif isinstance(image, torch.Tensor):
	image = normalize_pt(image, image_mode)

	image = image.cpu().permute(1, 2, 0).numpy()
	assert image.dtype == np.uint8, f"Supposed to convert `torch.uint8` to `np.uint8`, but got `{image.dtype}`"

	return image


	def to_pt(image: ImageType, image_mode: DataFormat \| None = None) -> torch.Tensor:
	"""
	Convert a PIL image or a NumPy array to a PyTorch tensor.
	"""
	check_image_type(image)

	if isinstance(image, torch.Tensor):
	image = normalize_pt(image, image_mode)
	return image

	# convert PIL Image to NumPy array
	if isinstance(image, PIL.Image.Image):
	image = np.array(image, np.uint8, copy=True)

	image = normalize_np(image, image_mode)

	image = torch.from_numpy(image.transpose((2, 0, 1))).contiguous()
	assert image.dtype == torch.uint8, f"Supposed to convert `np.uint8` to `torch.uint8`, but got `{image.dtype}`"
	return image


	def normalize_np(image: np.ndarray, image_mode: DataFormat \| None = None) -> np.ndarray:
	"""
	Normalize a NumPy array to the standard format of shape (h, w, c) and uint8.
	"""
	if image.ndim not in {2, 3}:
	raise ValueError(f"`image` should be 2 or 3 dimensions. Got {image.ndim} dimensions.")

	elif image.ndim == 2:
	# if 2D image, add channel dimension (HWC)
	image = np.expand_dims(image, 2)

	if image.shape[-1] not in {1, 3}:
	raise ValueError(f"`image` should have 1 (`L`) or 3 (`RGB`) channels. Got {image.shape[-1]} channels.")

	image = to_dataformat(image, image_mode=image_mode, mode="255")

	return image


	def normalize_pt(image: torch.Tensor, image_mode: DataFormat \| None = None) -> torch.Tensor:
	"""
	Normalize a PyTorch tensor to the standard format of shape (c, h, w) and uint8.
	"""
	if image.ndimension() not in {2, 3}:
	raise ValueError(f"`image` should be 2 or 3 dimensions. Got {image.ndimension()} dimensions.")

	elif image.ndimension() == 2:
	# if 2D image, add channel dimension (CHW)
	image = image.unsqueeze(0)

	# check number of channels
	if image.shape[-3] not in {1, 3}:
	raise ValueError(f"`image` should have 1 (`L`) or 3 (`RGB`) channels. Got {image.shape[-3]} channels.")

	image = to_dataformat(image, image_mode=image_mode, mode="255")

	return image


	def to_dataformat(
	image: ImageType,
	*,
	image_mode: DataFormat \| None = None,
	mode: DataFormat = "255",
	) -> np.ndarray \| torch.Tensor:
	check_image_type(image)

	# convert PIL Image to NumPy array
	if isinstance(image, PIL.Image.Image):
	image = np.array(image, np.uint8, copy=True)
	image_mode = "255"

	# guess image mode
	if image.dtype == np.uint8 or image.dtype == torch.uint8:
	guess_image_mode = "255"
	elif image.dtype == np.float32 or image.dtype == np.float16 or image.dtype == torch.float32 or image.dtype == torch.float16:
	if image.min() < 0.0:
	guess_image_mode = "11"
	else:
	guess_image_mode = "01"
	else:
	raise ValueError(f"Unsupported dtype `{image.dtype}`")

	if image_mode is None:
	image_mode = guess_image_mode
	else:
	if guess_image_mode != image_mode:
	print(f"Guess image mode is `{guess_image_mode}`, but image mode is `{image_mode}`")

	if isinstance(image, np.ndarray):
	if image_mode == "255" and mode != "255":
	np.clip((image.astype(np.float32) / 255), 0, 1, out=image)
	if mode == "11":
	np.clip((image * 2 - 1), -1, 1, out=image)

	elif image_mode == "01" and mode != "01":
	if mode == "255":
	np.clip(image, 0, 1, out=image)
	image = (image * 255).round().astype(np.uint8)
	elif mode == "11":
	np.clip((image * 2 - 1), -1, 1, out=image)

	elif image_mode == "11" and mode != "11":
	np.clip((image / 2 + 0.5), 0, 1, out=image)
	if mode == "255":
	image = (image * 255).round().astype(np.uint8)

	elif isinstance(image, torch.Tensor):
	if image_mode == "255" and mode != "255":
	image = image.to(dtype=torch.float32).div(255).clamp(0, 1)
	if mode == "11":
	image = (image * 2 - 1).clamp(-1, 1)

	elif image_mode == "01" and mode != "01":
	if mode == "255":
	image = image.clamp(0, 1)
	image = (image * 255).round().to(dtype=torch.uint8)
	elif mode == "11":
	image = (image * 2 - 1).clamp(-1, 1)

	elif image_mode == "11" and mode != "11":
	image = (image / 2 + 0.5).clamp(0, 1)
	if mode == "255":
	image = image.mul(255).round().to(dtype=torch.uint8)

	return image


	def resize_image(pil_image, image_size):
	while min(pil_image.size) >= 2 image_size:
	pil_image = pil_image.resize(tuple(x // 2 for x in pil_image.size), resample=PIL.Image.BOX)

	scale = image_size / min(*pil_image.size)
	pil_image = pil_image.resize(tuple(round(x * scale) for x in pil_image.size), resample=PIL.Image.BICUBIC)
	return pil_image


	def center_crop_arr(pil_image, image_size, crop=True):
	"""
	Center cropping implementation from ADM.
	https://github.com/openai/guided-diffusion/blob/8fb3ad9197f16bbc40620447b2742e13458d2831/guided_diffusion/image_datasets.py#L126
	"""
	if crop:
	pil_image = resize_image(pil_image, image_size)
	arr = np.array(pil_image)
	crop_y = (arr.shape[0] - image_size) // 2
	crop_x = (arr.shape[1] - image_size) // 2
	return PIL.Image.fromarray(arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size])
	else:
	# 将图像填充为正方形
	width, height = pil_image.size
	if width != height:
	# 创建一个正方形画布，尺寸为较大的边长
	max_dim = max(width, height)
	padded_img = PIL.Image.new(pil_image.mode, (max_dim, max_dim), (0, 0, 0))
	# 将原图居中粘贴到正方形画布上
	padded_img.paste(pil_image, ((max_dim - width) // 2, (max_dim - height) // 2))
	pil_image = padded_img
	pil_image = resize_image(pil_image, image_size)
	return pil_image