Spaces:
Sleeping
Sleeping
import os, torch, tempfile | |
import gradio as gr | |
from diffusers import LTXPipeline, AutoModel | |
from diffusers.hooks import apply_group_offloading | |
from diffusers.utils import export_to_video | |
# --------- λͺ¨λΈ λ‘λ ν¨μ --------- | |
def load_pipeline(device="cuda"): | |
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float16 | |
transformer = AutoModel.from_pretrained( | |
"Lightricks/LTX-Video", | |
subfolder="transformer", | |
torch_dtype=dtype, | |
trust_remote_code=True, # μ€μ: Placeholder λ°©μ§ | |
variant="bf16" if dtype==torch.bfloat16 else None | |
) | |
# fp8 layerwise casting (νκ²½ λ―Έμ§μμ 무μ) | |
try: | |
transformer.enable_layerwise_casting( | |
storage_dtype=torch.float8_e4m3fn, compute_dtype=dtype | |
) | |
fp8 = True | |
except Exception: | |
fp8 = False | |
pipe = LTXPipeline.from_pretrained( | |
"Lightricks/LTX-Video", | |
transformer=transformer, | |
torch_dtype=dtype, | |
trust_remote_code=True, | |
variant="bf16" if dtype==torch.bfloat16 else None | |
).to(device) | |
# group offloading (μ§μ μλλ©΄ 무μ) | |
try: | |
onload_device = torch.device(device) | |
offload_device = torch.device("cpu") | |
pipe.transformer.enable_group_offload( | |
onload_device=onload_device, | |
offload_device=offload_device, | |
offload_type="leaf_level", | |
use_stream=True | |
) | |
apply_group_offloading(pipe.text_encoder, onload_device=onload_device, | |
offload_type="block_level", num_blocks_per_group=2) | |
apply_group_offloading(pipe.vae, onload_device=onload_device, | |
offload_type="leaf_level") | |
offload = True | |
except Exception: | |
offload = False | |
return pipe, fp8, offload | |
PIPE, FP8_OK, OFFLOAD_OK = load_pipeline("cuda" if torch.cuda.is_available() else "cpu") | |
# --------- λΉλμ€ μμ± --------- | |
def generate(prompt, negative_prompt, | |
width, height, num_frames, fps, | |
decode_timestep, decode_noise_scale, | |
steps, seed): | |
g = None | |
if seed is not None and seed >= 0: | |
g = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(int(seed)) | |
with torch.inference_mode(): | |
result = PIPE( | |
prompt=prompt, | |
negative_prompt=negative_prompt or None, | |
width=width, | |
height=height, | |
num_frames=num_frames, | |
fps=fps, | |
decode_timestep=decode_timestep, | |
decode_noise_scale=decode_noise_scale, | |
num_inference_steps=steps, | |
generator=g | |
) | |
frames = result.frames[0] | |
tmpdir = tempfile.mkdtemp() | |
save_path = os.path.join(tmpdir, "output.mp4") | |
export_to_video(frames, save_path, fps=fps) | |
return save_path, f"FP8: {'ON' if FP8_OK else 'OFF'} | Offloading: {'ON' if OFFLOAD_OK else 'OFF'}" | |
# --------- Gradio UI --------- | |
with gr.Blocks(title="LTX-Video Gradio") as demo: | |
gr.Markdown("## π¬ LTX-Video Gradio Demo") | |
with gr.Row(): | |
prompt_in = gr.Textbox(label="Prompt", lines=6, value="A cinematic close-up of a smiling woman under warm sunset light.") | |
neg_in = gr.Textbox(label="Negative Prompt", lines=4, value="worst quality, inconsistent motion, blurry, jittery, distorted") | |
with gr.Row(): | |
width_in = gr.Slider(256, 1024, step=8, value=768, label="Width") | |
height_in = gr.Slider(256, 1024, step=8, value=512, label="Height") | |
with gr.Row(): | |
frames_in = gr.Slider(17, 241, step=2, value=65, label="Frames (num_frames)") | |
fps_in = gr.Slider(8, 30, step=1, value=24, label="FPS") | |
with gr.Row(): | |
dt_in = gr.Slider(0.0, 0.2, step=0.001, value=0.03, label="decode_timestep") | |
dns_in = gr.Slider(0.0, 0.2, step=0.001, value=0.025, label="decode_noise_scale") | |
steps_in = gr.Slider(10, 75, step=1, value=40, label="Inference Steps") | |
seed_in = gr.Number(value=-1, label="Seed (>=0 κ³ μ )") | |
btn = gr.Button("π₯ Generate Video", variant="primary") | |
video_out = gr.Video(label="Output", autoplay=True) | |
info_out = gr.Markdown() | |
btn.click(fn=generate, | |
inputs=[prompt_in, neg_in, width_in, height_in, | |
frames_in, fps_in, dt_in, dns_in, steps_in, seed_in], | |
outputs=[video_out, info_out]) | |
demo.queue().launch() | |