Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -94,21 +94,21 @@ pipe = create_ltx_video_pipeline(
|
|
94 |
sampler=PIPELINE_CONFIG_YAML["sampler"], # "from_checkpoint" or specific sampler
|
95 |
device=DEVICE,
|
96 |
enhance_prompt=False, # Assuming Gradio controls this, or set based on YAML later
|
97 |
-
)
|
98 |
|
99 |
# Create Latent Upsampler
|
100 |
latent_upsampler = create_latent_upsampler(
|
101 |
latent_upsampler_model_path=spatial_upsampler_path,
|
102 |
device=DEVICE
|
103 |
)
|
104 |
-
latent_upsampler = latent_upsampler.to(torch.bfloat16
|
105 |
|
106 |
|
107 |
# Multi-scale pipeline (wrapper)
|
108 |
multi_scale_pipe = LTXMultiScalePipeline(
|
109 |
video_pipeline=pipe,
|
110 |
latent_upsampler=latent_upsampler
|
111 |
-
)
|
112 |
# --- End Global Configuration & Model Loading ---
|
113 |
|
114 |
|
@@ -287,7 +287,7 @@ def generate(prompt,
|
|
287 |
"output_type": "latent"
|
288 |
}
|
289 |
latents = pipe(**first_pass_args).images # .images here is actually latents
|
290 |
-
|
291 |
# 2. Upsample latents manually
|
292 |
# Need to handle normalization around latent upsampler if it expects unnormalized latents
|
293 |
latents_unnorm = un_normalize_latents(latents, pipe.vae, vae_per_channel_normalize=True)
|
@@ -324,8 +324,9 @@ def generate(prompt,
|
|
324 |
decode_noise_val = PIPELINE_CONFIG_YAML.get("decode_noise_scale", 0.025)
|
325 |
upsampled_latents = upsampled_latents * (1 - decode_noise_val) + noise * decode_noise_val
|
326 |
|
327 |
-
|
328 |
result_frames_tensor = pipe.vae.decode(upsampled_latents, **decode_kwargs).sample
|
|
|
329 |
# result_frames_tensor shape: (B, C, F_video, H_video, W_video)
|
330 |
|
331 |
# --- Post-processing: Cropping and Converting to PIL ---
|
|
|
94 |
sampler=PIPELINE_CONFIG_YAML["sampler"], # "from_checkpoint" or specific sampler
|
95 |
device=DEVICE,
|
96 |
enhance_prompt=False, # Assuming Gradio controls this, or set based on YAML later
|
97 |
+
).to(torch.bfloat16)
|
98 |
|
99 |
# Create Latent Upsampler
|
100 |
latent_upsampler = create_latent_upsampler(
|
101 |
latent_upsampler_model_path=spatial_upsampler_path,
|
102 |
device=DEVICE
|
103 |
)
|
104 |
+
latent_upsampler = latent_upsampler.to(torch.bfloat16)
|
105 |
|
106 |
|
107 |
# Multi-scale pipeline (wrapper)
|
108 |
multi_scale_pipe = LTXMultiScalePipeline(
|
109 |
video_pipeline=pipe,
|
110 |
latent_upsampler=latent_upsampler
|
111 |
+
).to(torch.bfloat16)
|
112 |
# --- End Global Configuration & Model Loading ---
|
113 |
|
114 |
|
|
|
287 |
"output_type": "latent"
|
288 |
}
|
289 |
latents = pipe(**first_pass_args).images # .images here is actually latents
|
290 |
+
print("First pass done!")
|
291 |
# 2. Upsample latents manually
|
292 |
# Need to handle normalization around latent upsampler if it expects unnormalized latents
|
293 |
latents_unnorm = un_normalize_latents(latents, pipe.vae, vae_per_channel_normalize=True)
|
|
|
324 |
decode_noise_val = PIPELINE_CONFIG_YAML.get("decode_noise_scale", 0.025)
|
325 |
upsampled_latents = upsampled_latents * (1 - decode_noise_val) + noise * decode_noise_val
|
326 |
|
327 |
+
print("before vae decoding")
|
328 |
result_frames_tensor = pipe.vae.decode(upsampled_latents, **decode_kwargs).sample
|
329 |
+
print("after vae decoding?")
|
330 |
# result_frames_tensor shape: (B, C, F_video, H_video, W_video)
|
331 |
|
332 |
# --- Post-processing: Cropping and Converting to PIL ---
|