Spaces:
Running
on
Zero
Running
on
Zero
switch to distilled
Browse files
app.py
CHANGED
@@ -15,10 +15,13 @@ import mediapipe as mp
|
|
15 |
from PIL import Image
|
16 |
import cv2
|
17 |
|
|
|
18 |
dtype = torch.bfloat16
|
19 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
20 |
|
21 |
-
pipeline = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-dev", torch_dtype=dtype)
|
|
|
|
|
22 |
pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/ltxv-spatial-upscaler-0.9.7", vae=pipeline.vae, torch_dtype=dtype)
|
23 |
pipeline.to(device)
|
24 |
pipe_upsample.to(device)
|
@@ -197,12 +200,8 @@ def generate_video(
|
|
197 |
negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
|
198 |
height=768,
|
199 |
width=1152,
|
200 |
-
num_inference_steps=
|
201 |
-
guidance_scale=
|
202 |
-
guidance_rescale=0.7,
|
203 |
-
decode_timestep=0.05,
|
204 |
-
decode_noise_scale=0.025,
|
205 |
-
image_cond_noise_scale=0.0,
|
206 |
seed=0,
|
207 |
randomize_seed=False,
|
208 |
progress=gr.Progress()
|
@@ -263,11 +262,11 @@ def generate_video(
|
|
263 |
height=downscaled_height,
|
264 |
num_frames=num_frames,
|
265 |
num_inference_steps=num_inference_steps,
|
266 |
-
decode_timestep=
|
267 |
-
decode_noise_scale=
|
268 |
-
image_cond_noise_scale=image_cond_noise_scale,
|
269 |
guidance_scale=guidance_scale,
|
270 |
-
guidance_rescale=guidance_rescale,
|
271 |
generator=torch.Generator().manual_seed(seed),
|
272 |
output_type="latent",
|
273 |
).frames
|
@@ -293,11 +292,11 @@ def generate_video(
|
|
293 |
denoise_strength=0.4,
|
294 |
num_inference_steps=10,
|
295 |
latents=upscaled_latents,
|
296 |
-
decode_timestep=
|
297 |
-
decode_noise_scale=decode_noise_scale,
|
298 |
-
image_cond_noise_scale=image_cond_noise_scale,
|
299 |
guidance_scale=guidance_scale,
|
300 |
-
|
|
|
|
|
301 |
generator=torch.Generator(device="cuda").manual_seed(seed),
|
302 |
output_type="pil",
|
303 |
).frames[0]
|
@@ -392,7 +391,7 @@ with gr.Blocks() as demo:
|
|
392 |
minimum=10,
|
393 |
maximum=50,
|
394 |
step=1,
|
395 |
-
value=
|
396 |
)
|
397 |
|
398 |
with gr.Row():
|
@@ -401,43 +400,10 @@ with gr.Blocks() as demo:
|
|
401 |
minimum=1.0,
|
402 |
maximum=15.0,
|
403 |
step=0.1,
|
404 |
-
value=
|
405 |
-
)
|
406 |
-
guidance_rescale = gr.Slider(
|
407 |
-
label="Guidance Rescale",
|
408 |
-
minimum=0.0,
|
409 |
-
maximum=1.0,
|
410 |
-
step=0.05,
|
411 |
-
value=0.7,
|
412 |
-
visible=False
|
413 |
-
)
|
414 |
-
|
415 |
-
with gr.Row():
|
416 |
-
decode_timestep = gr.Slider(
|
417 |
-
label="Decode Timestep",
|
418 |
-
minimum=0.0,
|
419 |
-
maximum=1.0,
|
420 |
-
step=0.01,
|
421 |
-
value=0.05,
|
422 |
-
visible=False
|
423 |
-
)
|
424 |
-
decode_noise_scale = gr.Slider(
|
425 |
-
label="Decode Noise Scale",
|
426 |
-
minimum=0.0,
|
427 |
-
maximum=0.1,
|
428 |
-
step=0.005,
|
429 |
-
value=0.025,
|
430 |
-
visible=False
|
431 |
)
|
|
|
432 |
|
433 |
-
image_cond_noise_scale = gr.Slider(
|
434 |
-
label="Image Condition Noise Scale",
|
435 |
-
minimum=0.0,
|
436 |
-
maximum=0.5,
|
437 |
-
step=0.01,
|
438 |
-
value=0.0,
|
439 |
-
visible=False
|
440 |
-
)
|
441 |
|
442 |
with gr.Row():
|
443 |
randomize_seed = gr.Checkbox(
|
@@ -482,10 +448,6 @@ with gr.Blocks() as demo:
|
|
482 |
width,
|
483 |
num_inference_steps,
|
484 |
guidance_scale,
|
485 |
-
guidance_rescale,
|
486 |
-
decode_timestep,
|
487 |
-
decode_noise_scale,
|
488 |
-
image_cond_noise_scale,
|
489 |
seed,
|
490 |
randomize_seed
|
491 |
],
|
|
|
15 |
from PIL import Image
|
16 |
import cv2
|
17 |
|
18 |
+
|
19 |
dtype = torch.bfloat16
|
20 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
|
22 |
+
#pipeline = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-dev", torch_dtype=dtype)
|
23 |
+
pipeline = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=torch.bfloat16)
|
24 |
+
|
25 |
pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/ltxv-spatial-upscaler-0.9.7", vae=pipeline.vae, torch_dtype=dtype)
|
26 |
pipeline.to(device)
|
27 |
pipe_upsample.to(device)
|
|
|
200 |
negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
|
201 |
height=768,
|
202 |
width=1152,
|
203 |
+
num_inference_steps=7,
|
204 |
+
guidance_scale=1.0,
|
|
|
|
|
|
|
|
|
205 |
seed=0,
|
206 |
randomize_seed=False,
|
207 |
progress=gr.Progress()
|
|
|
262 |
height=downscaled_height,
|
263 |
num_frames=num_frames,
|
264 |
num_inference_steps=num_inference_steps,
|
265 |
+
decode_timestep=0.05,
|
266 |
+
decode_noise_scale=0.025,
|
267 |
+
# image_cond_noise_scale=image_cond_noise_scale,
|
268 |
guidance_scale=guidance_scale,
|
269 |
+
# guidance_rescale=guidance_rescale,
|
270 |
generator=torch.Generator().manual_seed(seed),
|
271 |
output_type="latent",
|
272 |
).frames
|
|
|
292 |
denoise_strength=0.4,
|
293 |
num_inference_steps=10,
|
294 |
latents=upscaled_latents,
|
295 |
+
decode_timestep = 0.05,
|
|
|
|
|
296 |
guidance_scale=guidance_scale,
|
297 |
+
decode_noise_scale = 0.025,
|
298 |
+
image_cond_noise_scale=0.025,
|
299 |
+
#guidance_rescale=guidance_rescale,
|
300 |
generator=torch.Generator(device="cuda").manual_seed(seed),
|
301 |
output_type="pil",
|
302 |
).frames[0]
|
|
|
391 |
minimum=10,
|
392 |
maximum=50,
|
393 |
step=1,
|
394 |
+
value=7
|
395 |
)
|
396 |
|
397 |
with gr.Row():
|
|
|
400 |
minimum=1.0,
|
401 |
maximum=15.0,
|
402 |
step=0.1,
|
403 |
+
value=1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
)
|
405 |
+
|
406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
|
408 |
with gr.Row():
|
409 |
randomize_seed = gr.Checkbox(
|
|
|
448 |
width,
|
449 |
num_inference_steps,
|
450 |
guidance_scale,
|
|
|
|
|
|
|
|
|
451 |
seed,
|
452 |
randomize_seed
|
453 |
],
|