Wan2GP / hyvideo /utils /data_utils.py
zxymimi23451's picture
Upload 258 files
78360e7 verified
import numpy as np
import math
from PIL import Image
import torch
import copy
import string
import random
def align_to(value, alignment):
"""align hight, width according to alignment
Args:
value (int): height or width
alignment (int): target alignment factor
Returns:
int: the aligned value
"""
return int(math.ceil(value / alignment) * alignment)
def black_image(width, height):
"""generate a black image
Args:
width (int): image width
height (int): image height
Returns:
_type_: a black image
"""
black_image = Image.new("RGB", (width, height), (0, 0, 0))
return black_image
def get_closest_ratio(height: float, width: float, ratios: list, buckets: list):
"""get the closest ratio in the buckets
Args:
height (float): video height
width (float): video width
ratios (list): video aspect ratio
buckets (list): buckets generate by `generate_crop_size_list`
Returns:
the closest ratio in the buckets and the corresponding ratio
"""
aspect_ratio = float(height) / float(width)
closest_ratio_id = np.abs(ratios - aspect_ratio).argmin()
closest_ratio = min(ratios, key=lambda ratio: abs(float(ratio) - aspect_ratio))
return buckets[closest_ratio_id], float(closest_ratio)
def generate_crop_size_list(base_size=256, patch_size=32, max_ratio=4.0):
"""generate crop size list
Args:
base_size (int, optional): the base size for generate bucket. Defaults to 256.
patch_size (int, optional): the stride to generate bucket. Defaults to 32.
max_ratio (float, optional): th max ratio for h or w based on base_size . Defaults to 4.0.
Returns:
list: generate crop size list
"""
num_patches = round((base_size / patch_size) ** 2)
assert max_ratio >= 1.0
crop_size_list = []
wp, hp = num_patches, 1
while wp > 0:
if max(wp, hp) / min(wp, hp) <= max_ratio:
crop_size_list.append((wp * patch_size, hp * patch_size))
if (hp + 1) * wp <= num_patches:
hp += 1
else:
wp -= 1
return crop_size_list
def align_floor_to(value, alignment):
"""align hight, width according to alignment
Args:
value (int): height or width
alignment (int): target alignment factor
Returns:
int: the aligned value
"""
return int(math.floor(value / alignment) * alignment)