Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,106 +7,105 @@ from diffusers import LTXPipeline, AutoModel
|
|
7 |
from diffusers.hooks import apply_group_offloading
|
8 |
from diffusers.utils import export_to_video
|
9 |
|
10 |
-
#
|
11 |
-
#
|
12 |
-
#
|
13 |
-
#
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
23 |
use_cuda = torch.cuda.is_available()
|
24 |
device = "cuda" if use_cuda else "cpu"
|
25 |
-
|
|
|
26 |
|
27 |
-
# 1) Transformer 로드
|
28 |
transformer = AutoModel.from_pretrained(
|
29 |
"Lightricks/LTX-Video",
|
30 |
subfolder="transformer",
|
31 |
torch_dtype=dtype,
|
|
|
32 |
trust_remote_code=True,
|
33 |
-
variant="bf16" if dtype == torch.bfloat16 else None
|
34 |
)
|
35 |
|
36 |
-
#
|
37 |
fp8_ok = False
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
|
47 |
-
# 3) Pipeline 로드
|
48 |
pipe = LTXPipeline.from_pretrained(
|
49 |
"Lightricks/LTX-Video",
|
50 |
transformer=transformer,
|
51 |
torch_dtype=dtype,
|
52 |
trust_remote_code=True,
|
53 |
-
variant="bf16" if dtype == torch.bfloat16 else None
|
54 |
).to(device)
|
55 |
|
56 |
-
# 4) 그룹 오프로딩 (가능한 경우만)
|
57 |
offload_ok = False
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
81 |
|
82 |
return pipe, fp8_ok, offload_ok, device
|
83 |
|
84 |
|
85 |
PIPE, FP8_OK, OFFLOAD_OK, DEVICE = load_pipeline()
|
86 |
|
87 |
-
def _to_uint8_frames(frames):
|
88 |
-
"""
|
89 |
-
(T,H,W,C) float/torch 텐서를 안전하게 uint8 numpy로 변환
|
90 |
-
"""
|
91 |
-
import numpy as np
|
92 |
|
|
|
|
|
93 |
if isinstance(frames, torch.Tensor):
|
94 |
frames = frames.detach().to("cpu").numpy()
|
95 |
|
96 |
-
if frames.ndim == 3:
|
97 |
-
# (T,H,W) -> (T,H,W,1)
|
98 |
frames = frames[..., None]
|
99 |
|
100 |
assert frames.ndim == 4, f"Unexpected frames shape: {frames.shape}"
|
101 |
|
102 |
if frames.dtype != np.uint8:
|
103 |
-
|
104 |
-
mx = frames.max()
|
105 |
if mx <= 1.0:
|
106 |
frames = (np.clip(frames, 0, 1) * 255).astype(np.uint8)
|
107 |
else:
|
108 |
frames = np.clip(frames, 0, 255).astype(np.uint8)
|
109 |
-
|
110 |
return frames
|
111 |
|
112 |
|
@@ -118,15 +117,14 @@ def generate_video(
|
|
118 |
):
|
119 |
# 시드
|
120 |
g = None
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
pass
|
128 |
|
129 |
-
# 추론
|
130 |
with torch.inference_mode():
|
131 |
out = PIPE(
|
132 |
prompt=(prompt or "").strip(),
|
@@ -134,40 +132,40 @@ def generate_video(
|
|
134 |
width=int(width),
|
135 |
height=int(height),
|
136 |
num_frames=int(num_frames),
|
137 |
-
fps
|
138 |
decode_timestep=float(decode_timestep),
|
139 |
decode_noise_scale=float(decode_noise_scale),
|
140 |
num_inference_steps=int(steps),
|
141 |
-
generator=g
|
142 |
)
|
143 |
-
frames = out.frames[0]
|
144 |
|
145 |
-
# 프레임을 안전한 형식으로 변환
|
146 |
frames = _to_uint8_frames(frames)
|
147 |
|
148 |
-
# 저장
|
149 |
tmpdir = tempfile.mkdtemp()
|
150 |
save_path = os.path.join(tmpdir, "output.mp4")
|
|
|
151 |
|
152 |
-
#
|
153 |
try:
|
154 |
-
export_to_video(frames, save_path, fps=
|
155 |
except Exception:
|
156 |
# 폴백: imageio-ffmpeg
|
157 |
import imageio.v3 as iio
|
158 |
-
iio.imwrite(save_path, frames, fps=
|
159 |
|
160 |
info = (
|
161 |
f"FP8: {'ON' if FP8_OK else 'OFF'} | "
|
162 |
f"Offloading: {'ON' if OFFLOAD_OK else 'OFF'} | "
|
163 |
f"Device: {DEVICE} | "
|
164 |
-
f"Frames: {frames.shape} | FPS: {
|
165 |
)
|
166 |
return save_path, info
|
167 |
|
168 |
|
169 |
-
#
|
170 |
-
with gr.Blocks(title="LTX-Video
|
171 |
gr.Markdown("## 🎬 LTX-Video — Prompt to Short Video")
|
172 |
|
173 |
with gr.Row():
|
@@ -188,7 +186,7 @@ with gr.Blocks(title="LTX-Video Gradio") as demo:
|
|
188 |
|
189 |
with gr.Row():
|
190 |
frames_in = gr.Slider(17, 241, value=65, step=2, label="num_frames")
|
191 |
-
fps_in = gr.Slider(8, 30, value=24, step=1, label="FPS")
|
192 |
|
193 |
with gr.Row():
|
194 |
dt_in = gr.Slider(0.0, 0.2, value=0.03, step=0.001, label="decode_timestep")
|
|
|
7 |
from diffusers.hooks import apply_group_offloading
|
8 |
from diffusers.utils import export_to_video
|
9 |
|
10 |
+
# --------------------------------------------
|
11 |
+
# 요구 패키지(Spaces):
|
12 |
+
# requirements.txt:
|
13 |
+
# torch>=2.2
|
14 |
+
# torchvision>=0.17
|
15 |
+
# accelerate>=0.28.0
|
16 |
+
# transformers>=4.40.0
|
17 |
+
# diffusers>=0.31.0
|
18 |
+
# safetensors>=0.4.2
|
19 |
+
# sentencepiece>=0.2.0
|
20 |
+
# gradio>=4.32.0
|
21 |
+
# imageio>=2.34.0
|
22 |
+
# imageio-ffmpeg>=0.4.9
|
23 |
+
# packages.txt:
|
24 |
+
# ffmpeg
|
25 |
+
# --------------------------------------------
|
26 |
+
|
27 |
+
def load_pipeline():
|
28 |
use_cuda = torch.cuda.is_available()
|
29 |
device = "cuda" if use_cuda else "cpu"
|
30 |
+
# CPU는 float16/float8 불가 → float32로
|
31 |
+
dtype = torch.bfloat16 if use_cuda else torch.float32
|
32 |
|
|
|
33 |
transformer = AutoModel.from_pretrained(
|
34 |
"Lightricks/LTX-Video",
|
35 |
subfolder="transformer",
|
36 |
torch_dtype=dtype,
|
37 |
+
# LTXPipeline은 trust_remote_code를 무시하지만 넣어도 무해
|
38 |
trust_remote_code=True,
|
39 |
+
variant="bf16" if (use_cuda and dtype == torch.bfloat16) else None,
|
40 |
)
|
41 |
|
42 |
+
# FP8은 가능한 경우에만 시도
|
43 |
fp8_ok = False
|
44 |
+
if use_cuda:
|
45 |
+
try:
|
46 |
+
transformer.enable_layerwise_casting(
|
47 |
+
storage_dtype=torch.float8_e4m3fn, compute_dtype=dtype
|
48 |
+
)
|
49 |
+
fp8_ok = True
|
50 |
+
except Exception:
|
51 |
+
fp8_ok = False
|
52 |
|
|
|
53 |
pipe = LTXPipeline.from_pretrained(
|
54 |
"Lightricks/LTX-Video",
|
55 |
transformer=transformer,
|
56 |
torch_dtype=dtype,
|
57 |
trust_remote_code=True,
|
58 |
+
variant="bf16" if (use_cuda and dtype == torch.bfloat16) else None,
|
59 |
).to(device)
|
60 |
|
|
|
61 |
offload_ok = False
|
62 |
+
if use_cuda:
|
63 |
+
try:
|
64 |
+
onload_device = torch.device(device)
|
65 |
+
offload_device = torch.device("cpu")
|
66 |
+
pipe.transformer.enable_group_offload(
|
67 |
+
onload_device=onload_device,
|
68 |
+
offload_device=offload_device,
|
69 |
+
offload_type="leaf_level",
|
70 |
+
use_stream=True,
|
71 |
+
)
|
72 |
+
apply_group_offloading(
|
73 |
+
pipe.text_encoder,
|
74 |
+
onload_device=onload_device,
|
75 |
+
offload_type="block_level",
|
76 |
+
num_blocks_per_group=2,
|
77 |
+
)
|
78 |
+
apply_group_offloading(
|
79 |
+
pipe.vae,
|
80 |
+
onload_device=onload_device,
|
81 |
+
offload_type="leaf_level",
|
82 |
+
)
|
83 |
+
offload_ok = True
|
84 |
+
except Exception:
|
85 |
+
offload_ok = False
|
86 |
|
87 |
return pipe, fp8_ok, offload_ok, device
|
88 |
|
89 |
|
90 |
PIPE, FP8_OK, OFFLOAD_OK, DEVICE = load_pipeline()
|
91 |
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
+
def _to_uint8_frames(frames):
|
94 |
+
# (T,H,W,C) torch/float → numpy uint8 로 안전 변환
|
95 |
if isinstance(frames, torch.Tensor):
|
96 |
frames = frames.detach().to("cpu").numpy()
|
97 |
|
98 |
+
if frames.ndim == 3: # (T,H,W) → (T,H,W,1)
|
|
|
99 |
frames = frames[..., None]
|
100 |
|
101 |
assert frames.ndim == 4, f"Unexpected frames shape: {frames.shape}"
|
102 |
|
103 |
if frames.dtype != np.uint8:
|
104 |
+
mx = float(frames.max() if frames.size else 1.0)
|
|
|
105 |
if mx <= 1.0:
|
106 |
frames = (np.clip(frames, 0, 1) * 255).astype(np.uint8)
|
107 |
else:
|
108 |
frames = np.clip(frames, 0, 255).astype(np.uint8)
|
|
|
109 |
return frames
|
110 |
|
111 |
|
|
|
117 |
):
|
118 |
# 시드
|
119 |
g = None
|
120 |
+
try:
|
121 |
+
s = int(seed)
|
122 |
+
if s >= 0:
|
123 |
+
g = torch.Generator(device=DEVICE).manual_seed(s)
|
124 |
+
except Exception:
|
125 |
+
pass
|
|
|
126 |
|
127 |
+
# -------- 추론 --------
|
128 |
with torch.inference_mode():
|
129 |
out = PIPE(
|
130 |
prompt=(prompt or "").strip(),
|
|
|
132 |
width=int(width),
|
133 |
height=int(height),
|
134 |
num_frames=int(num_frames),
|
135 |
+
# ★ LTXPipeline에는 fps 인자가 없습니다.
|
136 |
decode_timestep=float(decode_timestep),
|
137 |
decode_noise_scale=float(decode_noise_scale),
|
138 |
num_inference_steps=int(steps),
|
139 |
+
generator=g,
|
140 |
)
|
141 |
+
frames = out.frames[0]
|
142 |
|
|
|
143 |
frames = _to_uint8_frames(frames)
|
144 |
|
145 |
+
# -------- 저장 --------
|
146 |
tmpdir = tempfile.mkdtemp()
|
147 |
save_path = os.path.join(tmpdir, "output.mp4")
|
148 |
+
target_fps = int(fps)
|
149 |
|
150 |
+
# 우선 diffusers saver
|
151 |
try:
|
152 |
+
export_to_video(frames, save_path, fps=target_fps)
|
153 |
except Exception:
|
154 |
# 폴백: imageio-ffmpeg
|
155 |
import imageio.v3 as iio
|
156 |
+
iio.imwrite(save_path, frames, fps=target_fps, codec="libx264")
|
157 |
|
158 |
info = (
|
159 |
f"FP8: {'ON' if FP8_OK else 'OFF'} | "
|
160 |
f"Offloading: {'ON' if OFFLOAD_OK else 'OFF'} | "
|
161 |
f"Device: {DEVICE} | "
|
162 |
+
f"Frames: {frames.shape} | FPS: {target_fps}"
|
163 |
)
|
164 |
return save_path, info
|
165 |
|
166 |
|
167 |
+
# ----------------------------- Gradio UI -----------------------------
|
168 |
+
with gr.Blocks(title="LTX-Video — Prompt to Short Video") as demo:
|
169 |
gr.Markdown("## 🎬 LTX-Video — Prompt to Short Video")
|
170 |
|
171 |
with gr.Row():
|
|
|
186 |
|
187 |
with gr.Row():
|
188 |
frames_in = gr.Slider(17, 241, value=65, step=2, label="num_frames")
|
189 |
+
fps_in = gr.Slider(8, 30, value=24, step=1, label="FPS (save only)")
|
190 |
|
191 |
with gr.Row():
|
192 |
dt_in = gr.Slider(0.0, 0.2, value=0.03, step=0.001, label="decode_timestep")
|