Spaces:

englissi
/

videogenerator

Sleeping

File size: 4,461 Bytes

import os, torch, tempfile
import gradio as gr
from diffusers import LTXPipeline, AutoModel
from diffusers.hooks import apply_group_offloading
from diffusers.utils import export_to_video

# --------- 모델 로드 함수 ---------
def load_pipeline(device="cuda"):
    dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float16

    transformer = AutoModel.from_pretrained(
        "Lightricks/LTX-Video",
        subfolder="transformer",
        torch_dtype=dtype,
        trust_remote_code=True,   # 중요: Placeholder 방지
        variant="bf16" if dtype==torch.bfloat16 else None
    )

    # fp8 layerwise casting (환경 미지원시 무시)
    try:
        transformer.enable_layerwise_casting(
            storage_dtype=torch.float8_e4m3fn, compute_dtype=dtype
        )
        fp8 = True
    except Exception:
        fp8 = False

    pipe = LTXPipeline.from_pretrained(
        "Lightricks/LTX-Video",
        transformer=transformer,
        torch_dtype=dtype,
        trust_remote_code=True,
        variant="bf16" if dtype==torch.bfloat16 else None
    ).to(device)

    # group offloading (지원 안되면 무시)
    try:
        onload_device = torch.device(device)
        offload_device = torch.device("cpu")
        pipe.transformer.enable_group_offload(
            onload_device=onload_device,
            offload_device=offload_device,
            offload_type="leaf_level",
            use_stream=True
        )
        apply_group_offloading(pipe.text_encoder, onload_device=onload_device,
                               offload_type="block_level", num_blocks_per_group=2)
        apply_group_offloading(pipe.vae, onload_device=onload_device,
                               offload_type="leaf_level")
        offload = True
    except Exception:
        offload = False

    return pipe, fp8, offload

PIPE, FP8_OK, OFFLOAD_OK = load_pipeline("cuda" if torch.cuda.is_available() else "cpu")

# --------- 비디오 생성 ---------
def generate(prompt, negative_prompt,
             width, height, num_frames, fps,
             decode_timestep, decode_noise_scale,
             steps, seed):

    g = None
    if seed is not None and seed >= 0:
        g = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(int(seed))

    with torch.inference_mode():
        result = PIPE(
            prompt=prompt,
            negative_prompt=negative_prompt or None,
            width=width,
            height=height,
            num_frames=num_frames,
            fps=fps,
            decode_timestep=decode_timestep,
            decode_noise_scale=decode_noise_scale,
            num_inference_steps=steps,
            generator=g
        )
        frames = result.frames[0]

    tmpdir = tempfile.mkdtemp()
    save_path = os.path.join(tmpdir, "output.mp4")
    export_to_video(frames, save_path, fps=fps)
    return save_path, f"FP8: {'ON' if FP8_OK else 'OFF'} | Offloading: {'ON' if OFFLOAD_OK else 'OFF'}"

# --------- Gradio UI ---------
with gr.Blocks(title="LTX-Video Gradio") as demo:
    gr.Markdown("## 🎬 LTX-Video Gradio Demo")

    with gr.Row():
        prompt_in = gr.Textbox(label="Prompt", lines=6, value="A cinematic close-up of a smiling woman under warm sunset light.")
        neg_in = gr.Textbox(label="Negative Prompt", lines=4, value="worst quality, inconsistent motion, blurry, jittery, distorted")

    with gr.Row():
        width_in  = gr.Slider(256, 1024, step=8, value=768, label="Width")
        height_in = gr.Slider(256, 1024, step=8, value=512, label="Height")

    with gr.Row():
        frames_in = gr.Slider(17, 241, step=2, value=65, label="Frames (num_frames)")
        fps_in    = gr.Slider(8, 30, step=1, value=24, label="FPS")

    with gr.Row():
        dt_in  = gr.Slider(0.0, 0.2, step=0.001, value=0.03, label="decode_timestep")
        dns_in = gr.Slider(0.0, 0.2, step=0.001, value=0.025, label="decode_noise_scale")
        steps_in = gr.Slider(10, 75, step=1, value=40, label="Inference Steps")
        seed_in  = gr.Number(value=-1, label="Seed (>=0 고정)")

    btn = gr.Button("🎥 Generate Video", variant="primary")
    video_out = gr.Video(label="Output", autoplay=True)
    info_out = gr.Markdown()

    btn.click(fn=generate,
              inputs=[prompt_in, neg_in, width_in, height_in,
                      frames_in, fps_in, dt_in, dns_in, steps_in, seed_in],
              outputs=[video_out, info_out])

demo.queue().launch()