ltx-video-distilled

Running on Zero

App Files Files Community

ltx-video-distilled / ltx_video /models /transformers /symmetric_patchifier.py

linoyts HF Staff

Upload 35 files

833590f verified 17 days ago

raw

history blame contribute delete

2.77 kB

	from abc import ABC, abstractmethod
	from typing import Tuple

	import torch
	from diffusers.configuration_utils import ConfigMixin
	from einops import rearrange
	from torch import Tensor


	class Patchifier(ConfigMixin, ABC):
	def __init__(self, patch_size: int):
	super().__init__()
	self._patch_size = (1, patch_size, patch_size)

	@abstractmethod
	def patchify(self, latents: Tensor) -> Tuple[Tensor, Tensor]:
	raise NotImplementedError("Patchify method not implemented")

	@abstractmethod
	def unpatchify(
	self,
	latents: Tensor,
	output_height: int,
	output_width: int,
	out_channels: int,
	) -> Tuple[Tensor, Tensor]:
	pass

	@property
	def patch_size(self):
	return self._patch_size

	def get_latent_coords(
	self, latent_num_frames, latent_height, latent_width, batch_size, device
	):
	"""
	Return a tensor of shape [batch_size, 3, num_patches] containing the
	top-left corner latent coordinates of each latent patch.
	The tensor is repeated for each batch element.
	"""
	latent_sample_coords = torch.meshgrid(
	torch.arange(0, latent_num_frames, self._patch_size[0], device=device),
	torch.arange(0, latent_height, self._patch_size[1], device=device),
	torch.arange(0, latent_width, self._patch_size[2], device=device),
	)
	latent_sample_coords = torch.stack(latent_sample_coords, dim=0)
	latent_coords = latent_sample_coords.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
	latent_coords = rearrange(
	latent_coords, "b c f h w -> b c (f h w)", b=batch_size
	)
	return latent_coords


	class SymmetricPatchifier(Patchifier):
	def patchify(self, latents: Tensor) -> Tuple[Tensor, Tensor]:
	b, _, f, h, w = latents.shape
	latent_coords = self.get_latent_coords(f, h, w, b, latents.device)
	latents = rearrange(
	latents,
	"b c (f p1) (h p2) (w p3) -> b (f h w) (c p1 p2 p3)",
	p1=self._patch_size[0],
	p2=self._patch_size[1],
	p3=self._patch_size[2],
	)
	return latents, latent_coords

	def unpatchify(
	self,
	latents: Tensor,
	output_height: int,
	output_width: int,
	out_channels: int,
	) -> Tuple[Tensor, Tensor]:
	output_height = output_height // self._patch_size[1]
	output_width = output_width // self._patch_size[2]
	latents = rearrange(
	latents,
	"b (f h w) (c p q) -> b c f (h p) (w q)",
	h=output_height,
	w=output_width,
	p=self._patch_size[1],
	q=self._patch_size[2],
	)
	return latents