Spaces:

zerogpu-aoti
/

ltx-dev-fast

Running on Zero

App Files Files Community

sayakpaul HF Staff commited on 12 days ago

Commit

e2da1c9

1 Parent(s): 4e531c0

up

Browse files

Files changed (1) hide show

optimization.py +45 -11

optimization.py CHANGED Viewed

@@ -11,7 +11,7 @@ import torch
 from torch.utils._pytree import tree_map_only
 from torchao.quantization import quantize_
 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
 from optimization_utils import capture_component_call
 from optimization_utils import aoti_compile
@@ -19,10 +19,12 @@ from optimization_utils import aoti_compile
 P = ParamSpec('P')
-TRANSFORMER_NUM_FRAMES_DIM = torch.export.Dim('num_frames', min=3, max=21)
 TRANSFORMER_DYNAMIC_SHAPES = {
-    'hidden_states': {2: TRANSFORMER_NUM_FRAMES_DIM},
 }
 INDUCTOR_CONFIGS = {
@@ -33,9 +35,18 @@ INDUCTOR_CONFIGS = {
     'max_autotune': True,
     'triton.cudagraphs': True,
 }
 def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
     @spaces.GPU(duration=1500)
     def compile_transformer():
@@ -49,13 +60,28 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
         quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
         hidden_states: torch.Tensor = call.kwargs['hidden_states']
-        hidden_states_transposed = hidden_states.transpose(-1, -2).contiguous()
-        if hidden_states.shape[-1] > hidden_states.shape[-2]:
-            hidden_states_landscape = hidden_states
-            hidden_states_portrait = hidden_states_transposed
         else:
-            hidden_states_landscape = hidden_states_transposed
-            hidden_states_portrait = hidden_states
         exported_landscape = torch.export.export(
             mod=pipeline.transformer,
@@ -81,7 +107,15 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
     def combined_transformer(*args, **kwargs):
         hidden_states: torch.Tensor = kwargs['hidden_states']
-        if hidden_states.shape[-1] > hidden_states.shape[-2]:
             return compiled_landscape(*args, **kwargs)
         else:
             return compiled_portrait(*args, **kwargs)

 from torch.utils._pytree import tree_map_only
 from torchao.quantization import quantize_
 from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
+from diffusers import LTXConditionPipeline
 from optimization_utils import capture_component_call
 from optimization_utils import aoti_compile
 P = ParamSpec('P')
+# Sequence packing in LTX is a bit of a pain.
+# See: https://github.com/huggingface/diffusers/blob/c052791b5fe29ce8a308bf63dda97aa205b729be/src/diffusers/pipelines/ltx/pipeline_ltx.py#L420
+TRANSFORMER_NUM_FRAMES_DIM = torch.export.Dim('seq_len', min=4680, max=6000)
 TRANSFORMER_DYNAMIC_SHAPES = {
+    'hidden_states': {1: TRANSFORMER_NUM_FRAMES_DIM},
 }
 INDUCTOR_CONFIGS = {
     'max_autotune': True,
     'triton.cudagraphs': True,
 }
+TRANSFORMER_SPATIAL_PATCH_SIZE = 1
+TRANSFORMER_TEMPORAL_PATCH_SIZE = 1
+VAE_SPATIAL_COMPRESSION_RATIO = 32
+VAE_TEMPORAL_COMPRESSION_RATIO = 8
 def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
+    num_frames = kwargs.get("num_frames")
+    height = kwargs.get("height")
+    width =  kwargs.get("width")
+    latent_num_frames = (num_frames - 1) // VAE_TEMPORAL_COMPRESSION_RATIO + 1
+    latent_height = height // VAE_SPATIAL_COMPRESSION_RATIO
+    latent_width = width //VAE_SPATIAL_COMPRESSION_RATIO
     @spaces.GPU(duration=1500)
     def compile_transformer():
         quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
         hidden_states: torch.Tensor = call.kwargs['hidden_states']
+        unpacked_hidden_states = LTXConditionPipeline._unpack_latents(
+            hidden_states,
+            latent_num_frames,
+            latent_height,
+            latent_width,
+            TRANSFORMER_SPATIAL_PATCH_SIZE,
+            TRANSFORMER_TEMPORAL_PATCH_SIZE,
+        )
+        unpacked_hidden_states_transposed = hidden_states.transpose(-1, -2).contiguous()
+        if unpacked_hidden_states.shape[-1] > hidden_states.shape[-2]:
+            hidden_states_landscape = unpacked_hidden_states
+            hidden_states_portrait = unpacked_hidden_states_transposed
         else:
+            hidden_states_landscape = unpacked_hidden_states_transposed
+            hidden_states_portrait = unpacked_hidden_states
+        hidden_states_landscape = LTXConditionPipeline._pack_latents(
+            hidden_states_landscape, TRANSFORMER_SPATIAL_PATCH_SIZE, TRANSFORMER_TEMPORAL_PATCH_SIZE
+        )
+        hidden_states_portrait = LTXConditionPipeline._pack_latents(
+            hidden_states_portrait, TRANSFORMER_SPATIAL_PATCH_SIZE, TRANSFORMER_TEMPORAL_PATCH_SIZE
+        )
         exported_landscape = torch.export.export(
             mod=pipeline.transformer,
     def combined_transformer(*args, **kwargs):
         hidden_states: torch.Tensor = kwargs['hidden_states']
+        unpacked_hidden_states = LTXConditionPipeline._unpack_latents(
+            hidden_states,
+            latent_num_frames,
+            latent_height,
+            latent_width,
+            TRANSFORMER_SPATIAL_PATCH_SIZE,
+            TRANSFORMER_TEMPORAL_PATCH_SIZE,
+        )
+        if unpacked_hidden_states.shape[-1] > unpacked_hidden_states.shape[-2]:
             return compiled_landscape(*args, **kwargs)
         else:
             return compiled_portrait(*args, **kwargs)