File size: 4,461 Bytes
4a475b2
 
 
 
 
 
480c04e
 
 
 
4a475b2
 
 
480c04e
 
 
4a475b2
 
480c04e
4a475b2
 
 
 
480c04e
4a475b2
480c04e
4a475b2
 
 
 
480c04e
 
 
 
4a475b2
480c04e
4a475b2
 
 
 
 
 
 
 
 
480c04e
 
 
 
 
4a475b2
480c04e
4a475b2
480c04e
4a475b2
 
 
480c04e
 
 
 
 
 
 
4a475b2
480c04e
4a475b2
 
480c04e
 
 
4a475b2
 
 
 
 
 
480c04e
 
4a475b2
480c04e
4a475b2
 
 
 
480c04e
4a475b2
480c04e
4a475b2
480c04e
 
 
 
 
4a475b2
 
480c04e
 
4a475b2
 
480c04e
 
4a475b2
 
480c04e
 
 
 
4a475b2
480c04e
4a475b2
480c04e
4a475b2
480c04e
 
 
 
4a475b2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os, torch, tempfile
import gradio as gr
from diffusers import LTXPipeline, AutoModel
from diffusers.hooks import apply_group_offloading
from diffusers.utils import export_to_video

# --------- λͺ¨λΈ λ‘œλ“œ ν•¨μˆ˜ ---------
def load_pipeline(device="cuda"):
    dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float16

    transformer = AutoModel.from_pretrained(
        "Lightricks/LTX-Video",
        subfolder="transformer",
        torch_dtype=dtype,
        trust_remote_code=True,   # μ€‘μš”: Placeholder λ°©μ§€
        variant="bf16" if dtype==torch.bfloat16 else None
    )

    # fp8 layerwise casting (ν™˜κ²½ λ―Έμ§€μ›μ‹œ λ¬΄μ‹œ)
    try:
        transformer.enable_layerwise_casting(
            storage_dtype=torch.float8_e4m3fn, compute_dtype=dtype
        )
        fp8 = True
    except Exception:
        fp8 = False

    pipe = LTXPipeline.from_pretrained(
        "Lightricks/LTX-Video",
        transformer=transformer,
        torch_dtype=dtype,
        trust_remote_code=True,
        variant="bf16" if dtype==torch.bfloat16 else None
    ).to(device)

    # group offloading (지원 μ•ˆλ˜λ©΄ λ¬΄μ‹œ)
    try:
        onload_device = torch.device(device)
        offload_device = torch.device("cpu")
        pipe.transformer.enable_group_offload(
            onload_device=onload_device,
            offload_device=offload_device,
            offload_type="leaf_level",
            use_stream=True
        )
        apply_group_offloading(pipe.text_encoder, onload_device=onload_device,
                               offload_type="block_level", num_blocks_per_group=2)
        apply_group_offloading(pipe.vae, onload_device=onload_device,
                               offload_type="leaf_level")
        offload = True
    except Exception:
        offload = False

    return pipe, fp8, offload

PIPE, FP8_OK, OFFLOAD_OK = load_pipeline("cuda" if torch.cuda.is_available() else "cpu")

# --------- λΉ„λ””μ˜€ 생성 ---------
def generate(prompt, negative_prompt,
             width, height, num_frames, fps,
             decode_timestep, decode_noise_scale,
             steps, seed):

    g = None
    if seed is not None and seed >= 0:
        g = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(int(seed))

    with torch.inference_mode():
        result = PIPE(
            prompt=prompt,
            negative_prompt=negative_prompt or None,
            width=width,
            height=height,
            num_frames=num_frames,
            fps=fps,
            decode_timestep=decode_timestep,
            decode_noise_scale=decode_noise_scale,
            num_inference_steps=steps,
            generator=g
        )
        frames = result.frames[0]

    tmpdir = tempfile.mkdtemp()
    save_path = os.path.join(tmpdir, "output.mp4")
    export_to_video(frames, save_path, fps=fps)
    return save_path, f"FP8: {'ON' if FP8_OK else 'OFF'} | Offloading: {'ON' if OFFLOAD_OK else 'OFF'}"

# --------- Gradio UI ---------
with gr.Blocks(title="LTX-Video Gradio") as demo:
    gr.Markdown("## 🎬 LTX-Video Gradio Demo")

    with gr.Row():
        prompt_in = gr.Textbox(label="Prompt", lines=6, value="A cinematic close-up of a smiling woman under warm sunset light.")
        neg_in = gr.Textbox(label="Negative Prompt", lines=4, value="worst quality, inconsistent motion, blurry, jittery, distorted")

    with gr.Row():
        width_in  = gr.Slider(256, 1024, step=8, value=768, label="Width")
        height_in = gr.Slider(256, 1024, step=8, value=512, label="Height")

    with gr.Row():
        frames_in = gr.Slider(17, 241, step=2, value=65, label="Frames (num_frames)")
        fps_in    = gr.Slider(8, 30, step=1, value=24, label="FPS")

    with gr.Row():
        dt_in  = gr.Slider(0.0, 0.2, step=0.001, value=0.03, label="decode_timestep")
        dns_in = gr.Slider(0.0, 0.2, step=0.001, value=0.025, label="decode_noise_scale")
        steps_in = gr.Slider(10, 75, step=1, value=40, label="Inference Steps")
        seed_in  = gr.Number(value=-1, label="Seed (>=0 κ³ μ •)")

    btn = gr.Button("πŸŽ₯ Generate Video", variant="primary")
    video_out = gr.Video(label="Output", autoplay=True)
    info_out = gr.Markdown()

    btn.click(fn=generate,
              inputs=[prompt_in, neg_in, width_in, height_in,
                      frames_in, fps_in, dt_in, dns_in, steps_in, seed_in],
              outputs=[video_out, info_out])

demo.queue().launch()