Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,391 Bytes
10bbb52 34dc3bc 10bbb52 34dc3bc 10bbb52 34dc3bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
from diffusers import LTXConditionPipeline
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
import numpy as np
from PIL import Image
import torch
from diffusers.utils import load_image, load_video, export_to_video
from optimization import optimize_pipeline_
MODEL_ID = "Lightricks/LTX-Video-0.9.8-13B-distilled"
LANDSCAPE_WIDTH = 480
LANDSCAPE_HEIGHT = 832
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 24
MIN_FRAMES_MODEL = 8
MAX_FRAMES_MODEL = 96
MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
def resize_image(image: Image.Image) -> Image.Image:
if image.height > image.width:
transposed = image.transpose(Image.Transpose.ROTATE_90)
resized = resize_image_landscape(transposed)
return resized.transpose(Image.Transpose.ROTATE_270)
return resize_image_landscape(image)
def resize_image_landscape(image: Image.Image) -> Image.Image:
target_aspect = LANDSCAPE_WIDTH / LANDSCAPE_HEIGHT
width, height = image.size
in_aspect = width / height
if in_aspect > target_aspect:
new_width = round(height * target_aspect)
left = (width - new_width) // 2
image = image.crop((left, 0, left + new_width, height))
else:
new_height = round(width / target_aspect)
top = (height - new_height) // 2
image = image.crop((0, top, width, top + new_height))
return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
pipe = LTXConditionPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to("cuda")
dummy_image = Image.new("RGB", (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT))
video = load_video(export_to_video([dummy_image]))
condition1 = LTXVideoCondition(video=video, frame_index=0)
optimize_pipeline_(
pipe,
conditions=[condition1],
prompt="prompt",
negative_prompt="prompt",
height=LANDSCAPE_HEIGHT,
width=LANDSCAPE_WIDTH,
num_frames=MAX_FRAMES_MODEL,
num_inference_steps=2,
guidance_scale=1.0,
)
default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
input_image = load_image("peng.png")
duration_seconds = MAX_DURATION
guidance_scale = 1.0
num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
current_seed = 42
resized_image = resize_image(input_image)
steps = 8
video = load_video(export_to_video([resized_image]))
condition1 = LTXVideoCondition(video=video, frame_index=0)
output_frames_list = pipe(
conditions=[condition1],
prompt=default_prompt_i2v,
negative_prompt=default_negative_prompt,
height=resized_image.height,
width=resized_image.width,
num_frames=num_frames,
guidance_scale=float(guidance_scale),
num_inference_steps=int(steps),
generator=torch.Generator(device="cuda").manual_seed(current_seed),
).frames[0]
export_to_video(output_frames_list, "output_original.mp4", fps=24) |