Spaces:

zerogpu-aoti
/

ltx-dev-fast

Running on Zero

ltx-dev-fast / check.py

34dc3bc 6 days ago

3.39 kB

	from diffusers import LTXConditionPipeline
	from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
	import numpy as np
	from PIL import Image
	import torch
	from diffusers.utils import load_image, load_video, export_to_video
	from optimization import optimize_pipeline_

	MODEL_ID = "Lightricks/LTX-Video-0.9.8-13B-distilled"

	LANDSCAPE_WIDTH = 480
	LANDSCAPE_HEIGHT = 832
	MAX_SEED = np.iinfo(np.int32).max

	FIXED_FPS = 24
	MIN_FRAMES_MODEL = 8
	MAX_FRAMES_MODEL = 96

	MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
	MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)

	def resize_image(image: Image.Image) -> Image.Image:
	if image.height > image.width:
	transposed = image.transpose(Image.Transpose.ROTATE_90)
	resized = resize_image_landscape(transposed)
	return resized.transpose(Image.Transpose.ROTATE_270)
	return resize_image_landscape(image)


	def resize_image_landscape(image: Image.Image) -> Image.Image:
	target_aspect = LANDSCAPE_WIDTH / LANDSCAPE_HEIGHT
	width, height = image.size
	in_aspect = width / height
	if in_aspect > target_aspect:
	new_width = round(height * target_aspect)
	left = (width - new_width) // 2
	image = image.crop((left, 0, left + new_width, height))
	else:
	new_height = round(width / target_aspect)
	top = (height - new_height) // 2
	image = image.crop((0, top, width, top + new_height))
	return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)


	pipe = LTXConditionPipeline.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16).to("cuda")
	dummy_image = Image.new("RGB", (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT))
	video = load_video(export_to_video([dummy_image]))
	condition1 = LTXVideoCondition(video=video, frame_index=0)
	optimize_pipeline_(
	pipe,
	conditions=[condition1],
	prompt="prompt",
	negative_prompt="prompt",
	height=LANDSCAPE_HEIGHT,
	width=LANDSCAPE_WIDTH,
	num_frames=MAX_FRAMES_MODEL,
	num_inference_steps=2,
	guidance_scale=1.0,
	)

	default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
	default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
	input_image = load_image("peng.png")
	duration_seconds = MAX_DURATION
	guidance_scale = 1.0
	num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
	current_seed = 42
	resized_image = resize_image(input_image)
	steps = 8

	video = load_video(export_to_video([resized_image]))
	condition1 = LTXVideoCondition(video=video, frame_index=0)

	output_frames_list = pipe(
	conditions=[condition1],
	prompt=default_prompt_i2v,
	negative_prompt=default_negative_prompt,
	height=resized_image.height,
	width=resized_image.width,
	num_frames=num_frames,
	guidance_scale=float(guidance_scale),
	num_inference_steps=int(steps),
	generator=torch.Generator(device="cuda").manual_seed(current_seed),
	).frames[0]
	export_to_video(output_frames_list, "output_original.mp4", fps=24)