videogenerator / app.py
englissi's picture
Update app.py
480c04e verified
raw
history blame
4.46 kB
import os, torch, tempfile
import gradio as gr
from diffusers import LTXPipeline, AutoModel
from diffusers.hooks import apply_group_offloading
from diffusers.utils import export_to_video
# --------- λͺ¨λΈ λ‘œλ“œ ν•¨μˆ˜ ---------
def load_pipeline(device="cuda"):
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float16
transformer = AutoModel.from_pretrained(
"Lightricks/LTX-Video",
subfolder="transformer",
torch_dtype=dtype,
trust_remote_code=True, # μ€‘μš”: Placeholder λ°©μ§€
variant="bf16" if dtype==torch.bfloat16 else None
)
# fp8 layerwise casting (ν™˜κ²½ λ―Έμ§€μ›μ‹œ λ¬΄μ‹œ)
try:
transformer.enable_layerwise_casting(
storage_dtype=torch.float8_e4m3fn, compute_dtype=dtype
)
fp8 = True
except Exception:
fp8 = False
pipe = LTXPipeline.from_pretrained(
"Lightricks/LTX-Video",
transformer=transformer,
torch_dtype=dtype,
trust_remote_code=True,
variant="bf16" if dtype==torch.bfloat16 else None
).to(device)
# group offloading (지원 μ•ˆλ˜λ©΄ λ¬΄μ‹œ)
try:
onload_device = torch.device(device)
offload_device = torch.device("cpu")
pipe.transformer.enable_group_offload(
onload_device=onload_device,
offload_device=offload_device,
offload_type="leaf_level",
use_stream=True
)
apply_group_offloading(pipe.text_encoder, onload_device=onload_device,
offload_type="block_level", num_blocks_per_group=2)
apply_group_offloading(pipe.vae, onload_device=onload_device,
offload_type="leaf_level")
offload = True
except Exception:
offload = False
return pipe, fp8, offload
PIPE, FP8_OK, OFFLOAD_OK = load_pipeline("cuda" if torch.cuda.is_available() else "cpu")
# --------- λΉ„λ””μ˜€ 생성 ---------
def generate(prompt, negative_prompt,
width, height, num_frames, fps,
decode_timestep, decode_noise_scale,
steps, seed):
g = None
if seed is not None and seed >= 0:
g = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(int(seed))
with torch.inference_mode():
result = PIPE(
prompt=prompt,
negative_prompt=negative_prompt or None,
width=width,
height=height,
num_frames=num_frames,
fps=fps,
decode_timestep=decode_timestep,
decode_noise_scale=decode_noise_scale,
num_inference_steps=steps,
generator=g
)
frames = result.frames[0]
tmpdir = tempfile.mkdtemp()
save_path = os.path.join(tmpdir, "output.mp4")
export_to_video(frames, save_path, fps=fps)
return save_path, f"FP8: {'ON' if FP8_OK else 'OFF'} | Offloading: {'ON' if OFFLOAD_OK else 'OFF'}"
# --------- Gradio UI ---------
with gr.Blocks(title="LTX-Video Gradio") as demo:
gr.Markdown("## 🎬 LTX-Video Gradio Demo")
with gr.Row():
prompt_in = gr.Textbox(label="Prompt", lines=6, value="A cinematic close-up of a smiling woman under warm sunset light.")
neg_in = gr.Textbox(label="Negative Prompt", lines=4, value="worst quality, inconsistent motion, blurry, jittery, distorted")
with gr.Row():
width_in = gr.Slider(256, 1024, step=8, value=768, label="Width")
height_in = gr.Slider(256, 1024, step=8, value=512, label="Height")
with gr.Row():
frames_in = gr.Slider(17, 241, step=2, value=65, label="Frames (num_frames)")
fps_in = gr.Slider(8, 30, step=1, value=24, label="FPS")
with gr.Row():
dt_in = gr.Slider(0.0, 0.2, step=0.001, value=0.03, label="decode_timestep")
dns_in = gr.Slider(0.0, 0.2, step=0.001, value=0.025, label="decode_noise_scale")
steps_in = gr.Slider(10, 75, step=1, value=40, label="Inference Steps")
seed_in = gr.Number(value=-1, label="Seed (>=0 κ³ μ •)")
btn = gr.Button("πŸŽ₯ Generate Video", variant="primary")
video_out = gr.Video(label="Output", autoplay=True)
info_out = gr.Markdown()
btn.click(fn=generate,
inputs=[prompt_in, neg_in, width_in, height_in,
frames_in, fps_in, dt_in, dns_in, steps_in, seed_in],
outputs=[video_out, info_out])
demo.queue().launch()