Spaces:

zerogpu-aoti
/

ltx-dev-fast

Running on Zero

App Files Files Community

sayakpaul HF Staff commited on 12 days ago

Commit

7f65363

1 Parent(s): e2da1c9

up

Browse files

Files changed (3) hide show

app.py +46 -23
optimization.py +26 -24
optimization_utils.py +13 -10

app.py CHANGED Viewed

@@ -1,6 +1,9 @@
 # PyTorch 2.8 (temporary hack)
 import os
-os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
 # Actual demo code
 import spaces
@@ -25,14 +28,14 @@ FIXED_FPS = 24
 MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 96
-MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
-MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
 pipe = LTXConditionPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to("cuda")
 optimize_pipeline_(
     pipe,
-    image=Image.new('RGB', (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT)),
-    prompt='prompt',
     height=LANDSCAPE_HEIGHT,
     width=LANDSCAPE_WIDTH,
     num_frames=MAX_FRAMES_MODEL,
@@ -64,6 +67,7 @@ def resize_image_landscape(image: Image.Image) -> Image.Image:
         image = image.crop((0, top, width, top + new_height))
     return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
 def get_duration(
     input_image,
     prompt,
@@ -82,6 +86,7 @@ def get_duration(
     else:
         return 60
 @spaces.GPU(duration=get_duration)
 def generate_video(
     input_image,
@@ -96,15 +101,15 @@ def generate_video(
 ):
     """
     Generate a video from an input image using the LTX distilled model.
     This function takes an input image and generates a video animation based on the provided
     prompt and parameters. It uses the LTX 13B Distilled Image-to-Video model for fast generation
     in 4-8 steps.
     Args:
         input_image (PIL.Image): The input image to animate. Will be resized to target dimensions.
         prompt (str): Text prompt describing the desired animation or motion.
-        negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
             Defaults to default_negative_prompt (contains unwanted visual artifacts).
         duration_seconds (float, optional): Duration of the generated video in seconds.
             Defaults to 2. Clamped between MIN_FRAMES_MODEL/FIXED_FPS and MAX_FRAMES_MODEL/FIXED_FPS.
@@ -117,15 +122,15 @@ def generate_video(
         randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
             Defaults to False.
         progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
     Returns:
         tuple: A tuple containing:
             - video_path (str): Path to the generated video file (.mp4)
             - current_seed (int): The seed used for generation (useful when randomize_seed=True)
     Raises:
         gr.Error: If input_image is None (no image uploaded).
     Note:
         - The function automatically resizes the input image to the target dimensions
         - Frame count is calculated as duration_seconds * FIXED_FPS (24)
@@ -135,7 +140,7 @@ def generate_video(
     """
     if input_image is None:
         raise gr.Error("Please upload an input image.")
     num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = resize_image(input_image)
@@ -161,39 +166,57 @@ def generate_video(
     return video_path, current_seed
 with gr.Blocks() as demo:
     gr.Markdown("# Fast few-steps LTX 0.9.8 I2V (13B)")
     with gr.Row():
         with gr.Column():
             input_image_component = gr.Image(type="pil", label="Input Image (auto-resized to target H/W)")
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
-            duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=MAX_DURATION, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
-                steps_slider = gr.Slider(minimum=1, maximum=10, step=1, value=8, label="Inference Steps")
-                guidance_scale_input = gr.Slider(minimum=1.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=False)
             generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
             video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
     ui_inputs = [
-        input_image_component, prompt_input,
-        negative_prompt_input, duration_seconds_input,
-        guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
     ]
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
     gr.Examples(
-        examples=[
             ["peng.png", "a penguin playfully dancing in the snow, Antarctica"],
             ["forg.jpg", "the frog jumps around"],
         ],
-        inputs=[input_image_component, prompt_input], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
     )
 if __name__ == "__main__":
-    demo.queue().launch(mcp_server=True)

 # PyTorch 2.8 (temporary hack)
 import os
+os.system(
+    'pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces'
+)
 # Actual demo code
 import spaces
 MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 96
+MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
+MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
 pipe = LTXConditionPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to("cuda")
 optimize_pipeline_(
     pipe,
+    image=Image.new("RGB", (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT)),
+    prompt="prompt",
     height=LANDSCAPE_HEIGHT,
     width=LANDSCAPE_WIDTH,
     num_frames=MAX_FRAMES_MODEL,
         image = image.crop((0, top, width, top + new_height))
     return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
 def get_duration(
     input_image,
     prompt,
     else:
         return 60
 @spaces.GPU(duration=get_duration)
 def generate_video(
     input_image,
 ):
     """
     Generate a video from an input image using the LTX distilled model.
     This function takes an input image and generates a video animation based on the provided
     prompt and parameters. It uses the LTX 13B Distilled Image-to-Video model for fast generation
     in 4-8 steps.
     Args:
         input_image (PIL.Image): The input image to animate. Will be resized to target dimensions.
         prompt (str): Text prompt describing the desired animation or motion.
+        negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
             Defaults to default_negative_prompt (contains unwanted visual artifacts).
         duration_seconds (float, optional): Duration of the generated video in seconds.
             Defaults to 2. Clamped between MIN_FRAMES_MODEL/FIXED_FPS and MAX_FRAMES_MODEL/FIXED_FPS.
         randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
             Defaults to False.
         progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
     Returns:
         tuple: A tuple containing:
             - video_path (str): Path to the generated video file (.mp4)
             - current_seed (int): The seed used for generation (useful when randomize_seed=True)
     Raises:
         gr.Error: If input_image is None (no image uploaded).
     Note:
         - The function automatically resizes the input image to the target dimensions
         - Frame count is calculated as duration_seconds * FIXED_FPS (24)
     """
     if input_image is None:
         raise gr.Error("Please upload an input image.")
     num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = resize_image(input_image)
     return video_path, current_seed
 with gr.Blocks() as demo:
     gr.Markdown("# Fast few-steps LTX 0.9.8 I2V (13B)")
     with gr.Row():
         with gr.Column():
             input_image_component = gr.Image(type="pil", label="Input Image (auto-resized to target H/W)")
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
+            duration_seconds_input = gr.Slider(
+                minimum=MIN_DURATION,
+                maximum=MAX_DURATION,
+                step=0.1,
+                value=MAX_DURATION,
+                label="Duration (seconds)",
+                info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.",
+            )
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
+                steps_slider = gr.Slider(minimum=1, maximum=10, step=1, value=8, label="Inference Steps")
+                guidance_scale_input = gr.Slider(
+                    minimum=1.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=False
+                )
             generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
             video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
     ui_inputs = [
+        input_image_component,
+        prompt_input,
+        negative_prompt_input,
+        duration_seconds_input,
+        guidance_scale_input,
+        steps_slider,
+        seed_input,
+        randomize_seed_checkbox,
     ]
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
     gr.Examples(
+        examples=[
             ["peng.png", "a penguin playfully dancing in the snow, Antarctica"],
             ["forg.jpg", "the frog jumps around"],
         ],
+        inputs=[input_image_component, prompt_input],
+        outputs=[video_output, seed_input],
+        fn=generate_video,
+        cache_examples="lazy",
     )
 if __name__ == "__main__":
+    demo.queue().launch(mcp_server=True)

optimization.py CHANGED Viewed

@@ -16,50 +16,50 @@ from optimization_utils import capture_component_call
 from optimization_utils import aoti_compile
-P = ParamSpec('P')
 # Sequence packing in LTX is a bit of a pain.
 # See: https://github.com/huggingface/diffusers/blob/c052791b5fe29ce8a308bf63dda97aa205b729be/src/diffusers/pipelines/ltx/pipeline_ltx.py#L420
-TRANSFORMER_NUM_FRAMES_DIM = torch.export.Dim('seq_len', min=4680, max=6000)
 TRANSFORMER_DYNAMIC_SHAPES = {
-    'hidden_states': {1: TRANSFORMER_NUM_FRAMES_DIM},
 }
 INDUCTOR_CONFIGS = {
-    'conv_1x1_as_mm': True,
-    'epilogue_fusion': False,
-    'coordinate_descent_tuning': True,
-    'coordinate_descent_check_all_directions': True,
-    'max_autotune': True,
-    'triton.cudagraphs': True,
 }
 TRANSFORMER_SPATIAL_PATCH_SIZE = 1
 TRANSFORMER_TEMPORAL_PATCH_SIZE = 1
 VAE_SPATIAL_COMPRESSION_RATIO = 32
 VAE_TEMPORAL_COMPRESSION_RATIO = 8
 def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
     num_frames = kwargs.get("num_frames")
     height = kwargs.get("height")
-    width =  kwargs.get("width")
     latent_num_frames = (num_frames - 1) // VAE_TEMPORAL_COMPRESSION_RATIO + 1
     latent_height = height // VAE_SPATIAL_COMPRESSION_RATIO
-    latent_width = width //VAE_SPATIAL_COMPRESSION_RATIO
     @spaces.GPU(duration=1500)
     def compile_transformer():
-        with capture_component_call(pipeline, 'transformer') as call:
             pipeline(*args, **kwargs)
         dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
         dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
         quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
-        hidden_states: torch.Tensor = call.kwargs['hidden_states']
         unpacked_hidden_states = LTXConditionPipeline._unpack_latents(
             hidden_states,
             latent_num_frames,
@@ -68,7 +68,7 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
             TRANSFORMER_SPATIAL_PATCH_SIZE,
             TRANSFORMER_TEMPORAL_PATCH_SIZE,
         )
-        unpacked_hidden_states_transposed = hidden_states.transpose(-1, -2).contiguous()
         if unpacked_hidden_states.shape[-1] > hidden_states.shape[-2]:
             hidden_states_landscape = unpacked_hidden_states
             hidden_states_portrait = unpacked_hidden_states_transposed
@@ -86,27 +86,29 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
         exported_landscape = torch.export.export(
             mod=pipeline.transformer,
             args=call.args,
-            kwargs=call.kwargs | {'hidden_states': hidden_states_landscape},
             dynamic_shapes=dynamic_shapes,
         )
         exported_portrait = torch.export.export(
             mod=pipeline.transformer,
             args=call.args,
-            kwargs=call.kwargs | {'hidden_states': hidden_states_portrait},
             dynamic_shapes=dynamic_shapes,
         )
         compiled_landscape = aoti_compile(exported_landscape, INDUCTOR_CONFIGS)
         compiled_portrait = aoti_compile(exported_portrait, INDUCTOR_CONFIGS)
-        compiled_portrait.weights = compiled_landscape.weights # Avoid weights duplication when serializing back to main process
         return compiled_landscape, compiled_portrait
     compiled_landscape, compiled_portrait = compile_transformer()
     def combined_transformer(*args, **kwargs):
-        hidden_states: torch.Tensor = kwargs['hidden_states']
         unpacked_hidden_states = LTXConditionPipeline._unpack_latents(
             hidden_states,
             latent_num_frames,
@@ -123,5 +125,5 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
     transformer_config = pipeline.transformer.config
     transformer_dtype = pipeline.transformer.dtype
     pipeline.transformer = combined_transformer
-    pipeline.transformer.config = transformer_config # pyright: ignore[reportAttributeAccessIssue]
-    pipeline.transformer.dtype = transformer_dtype # pyright: ignore[reportAttributeAccessIssue]

 from optimization_utils import aoti_compile
+P = ParamSpec("P")
 # Sequence packing in LTX is a bit of a pain.
 # See: https://github.com/huggingface/diffusers/blob/c052791b5fe29ce8a308bf63dda97aa205b729be/src/diffusers/pipelines/ltx/pipeline_ltx.py#L420
+TRANSFORMER_NUM_FRAMES_DIM = torch.export.Dim("seq_len", min=4680, max=6000)
 TRANSFORMER_DYNAMIC_SHAPES = {
+    "hidden_states": {1: TRANSFORMER_NUM_FRAMES_DIM},
 }
 INDUCTOR_CONFIGS = {
+    "conv_1x1_as_mm": True,
+    "epilogue_fusion": False,
+    "coordinate_descent_tuning": True,
+    "coordinate_descent_check_all_directions": True,
+    "max_autotune": True,
+    "triton.cudagraphs": True,
 }
 TRANSFORMER_SPATIAL_PATCH_SIZE = 1
 TRANSFORMER_TEMPORAL_PATCH_SIZE = 1
 VAE_SPATIAL_COMPRESSION_RATIO = 32
 VAE_TEMPORAL_COMPRESSION_RATIO = 8
 def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
     num_frames = kwargs.get("num_frames")
     height = kwargs.get("height")
+    width = kwargs.get("width")
     latent_num_frames = (num_frames - 1) // VAE_TEMPORAL_COMPRESSION_RATIO + 1
     latent_height = height // VAE_SPATIAL_COMPRESSION_RATIO
+    latent_width = width // VAE_SPATIAL_COMPRESSION_RATIO
     @spaces.GPU(duration=1500)
     def compile_transformer():
+        with capture_component_call(pipeline, "transformer") as call:
             pipeline(*args, **kwargs)
         dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
         dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
         quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
+        hidden_states: torch.Tensor = call.kwargs["hidden_states"]
         unpacked_hidden_states = LTXConditionPipeline._unpack_latents(
             hidden_states,
             latent_num_frames,
             TRANSFORMER_SPATIAL_PATCH_SIZE,
             TRANSFORMER_TEMPORAL_PATCH_SIZE,
         )
+        unpacked_hidden_states_transposed = unpacked_hidden_states.transpose(-1, -2).contiguous()
         if unpacked_hidden_states.shape[-1] > hidden_states.shape[-2]:
             hidden_states_landscape = unpacked_hidden_states
             hidden_states_portrait = unpacked_hidden_states_transposed
         exported_landscape = torch.export.export(
             mod=pipeline.transformer,
             args=call.args,
+            kwargs=call.kwargs | {"hidden_states": hidden_states_landscape},
             dynamic_shapes=dynamic_shapes,
         )
         exported_portrait = torch.export.export(
             mod=pipeline.transformer,
             args=call.args,
+            kwargs=call.kwargs | {"hidden_states": hidden_states_portrait},
             dynamic_shapes=dynamic_shapes,
         )
         compiled_landscape = aoti_compile(exported_landscape, INDUCTOR_CONFIGS)
         compiled_portrait = aoti_compile(exported_portrait, INDUCTOR_CONFIGS)
+        compiled_portrait.weights = (
+            compiled_landscape.weights
+        )  # Avoid weights duplication when serializing back to main process
         return compiled_landscape, compiled_portrait
     compiled_landscape, compiled_portrait = compile_transformer()
     def combined_transformer(*args, **kwargs):
+        hidden_states: torch.Tensor = kwargs["hidden_states"]
         unpacked_hidden_states = LTXConditionPipeline._unpack_latents(
             hidden_states,
             latent_num_frames,
     transformer_config = pipeline.transformer.config
     transformer_dtype = pipeline.transformer.dtype
     pipeline.transformer = combined_transformer
+    pipeline.transformer.config = transformer_config  # pyright: ignore[reportAttributeAccessIssue]
+    pipeline.transformer.dtype = transformer_dtype  # pyright: ignore[reportAttributeAccessIssue]

optimization_utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """
 Taken from https://huggingface.co/spaces/cbensimon/wan2-1-fast/
 """
 import contextlib
 from contextvars import ContextVar
 from io import BytesIO
@@ -15,22 +16,23 @@ from torch.export.pt2_archive._package_weights import Weights
 INDUCTOR_CONFIGS_OVERRIDES = {
-    'aot_inductor.package_constants_in_so': False,
-    'aot_inductor.package_constants_on_disk': True,
-    'aot_inductor.package': True,
 }
 class ZeroGPUWeights:
     def __init__(self, constants_map: dict[str, torch.Tensor], to_cuda: bool = False):
         if to_cuda:
-            self.constants_map = {name: tensor.to('cuda') for name, tensor in constants_map.items()}
         else:
             self.constants_map = constants_map
     def __reduce__(self):
         constants_map: dict[str, torch.Tensor] = {}
         for name, tensor in self.constants_map.items():
-            tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
             constants_map[name] = tensor_.copy_(tensor).detach().share_memory_()
         return ZeroGPUWeights, (constants_map, True)
@@ -39,13 +41,15 @@ class ZeroGPUCompiledModel:
     def __init__(self, archive_file: torch.types.FileLike, weights: ZeroGPUWeights):
         self.archive_file = archive_file
         self.weights = weights
-        self.compiled_model: ContextVar[AOTICompiledModel | None] = ContextVar('compiled_model', default=None)
     def __call__(self, *args, **kwargs):
         if (compiled_model := self.compiled_model.get()) is None:
             compiled_model = cast(AOTICompiledModel, torch._inductor.aoti_load_package(self.archive_file))
             compiled_model.load_constants(self.weights.constants_map, check_full_update=True, user_managed=True)
             self.compiled_model.set(compiled_model)
         return compiled_model(*args, **kwargs)
     def __reduce__(self):
         return ZeroGPUCompiledModel, (self.archive_file, self.weights)
@@ -62,7 +66,7 @@ def aoti_compile(
     archive_file = BytesIO()
     files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
     package_aoti(archive_file, files)
-    weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
     zerogpu_weights = ZeroGPUWeights({name: weights.get_weight(name)[0] for name in weights})
     return ZeroGPUCompiledModel(archive_file, zerogpu_weights)
@@ -71,9 +75,8 @@ def aoti_compile(
 def capture_component_call(
     pipeline: Any,
     component_name: str,
-    component_method='forward',
 ):
     class CapturedCallException(Exception):
         def __init__(self, *args, **kwargs):
             super().__init__()
@@ -96,4 +99,4 @@ def capture_component_call(
             yield captured_call
         except CapturedCallException as e:
             captured_call.args = e.args
-            captured_call.kwargs = e.kwargs

 """
 Taken from https://huggingface.co/spaces/cbensimon/wan2-1-fast/
 """
 import contextlib
 from contextvars import ContextVar
 from io import BytesIO
 INDUCTOR_CONFIGS_OVERRIDES = {
+    "aot_inductor.package_constants_in_so": False,
+    "aot_inductor.package_constants_on_disk": True,
+    "aot_inductor.package": True,
 }
 class ZeroGPUWeights:
     def __init__(self, constants_map: dict[str, torch.Tensor], to_cuda: bool = False):
         if to_cuda:
+            self.constants_map = {name: tensor.to("cuda") for name, tensor in constants_map.items()}
         else:
             self.constants_map = constants_map
     def __reduce__(self):
         constants_map: dict[str, torch.Tensor] = {}
         for name, tensor in self.constants_map.items():
+            tensor_ = torch.empty_like(tensor, device="cpu").pin_memory()
             constants_map[name] = tensor_.copy_(tensor).detach().share_memory_()
         return ZeroGPUWeights, (constants_map, True)
     def __init__(self, archive_file: torch.types.FileLike, weights: ZeroGPUWeights):
         self.archive_file = archive_file
         self.weights = weights
+        self.compiled_model: ContextVar[AOTICompiledModel | None] = ContextVar("compiled_model", default=None)
     def __call__(self, *args, **kwargs):
         if (compiled_model := self.compiled_model.get()) is None:
             compiled_model = cast(AOTICompiledModel, torch._inductor.aoti_load_package(self.archive_file))
             compiled_model.load_constants(self.weights.constants_map, check_full_update=True, user_managed=True)
             self.compiled_model.set(compiled_model)
         return compiled_model(*args, **kwargs)
     def __reduce__(self):
         return ZeroGPUCompiledModel, (self.archive_file, self.weights)
     archive_file = BytesIO()
     files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
     package_aoti(archive_file, files)
+    (weights,) = (artifact for artifact in artifacts if isinstance(artifact, Weights))
     zerogpu_weights = ZeroGPUWeights({name: weights.get_weight(name)[0] for name in weights})
     return ZeroGPUCompiledModel(archive_file, zerogpu_weights)
 def capture_component_call(
     pipeline: Any,
     component_name: str,
+    component_method="forward",
 ):
     class CapturedCallException(Exception):
         def __init__(self, *args, **kwargs):
             super().__init__()
             yield captured_call
         except CapturedCallException as e:
             captured_call.args = e.args
+            captured_call.kwargs = e.kwargs