englissi commited on
Commit
d0237b3
·
verified ·
1 Parent(s): d9bde98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -81
app.py CHANGED
@@ -7,106 +7,105 @@ from diffusers import LTXPipeline, AutoModel
7
  from diffusers.hooks import apply_group_offloading
8
  from diffusers.utils import export_to_video
9
 
10
- # -------------------------------------------------------------------
11
- # 환경 의존성 참고:
12
- # pip install -U torch torchvision accelerate transformers diffusers safetensors sentencepiece gradio imageio imageio-ffmpeg
13
- # (Spaces/도커라면 ffmpeg 바이너리 필요할 수 있음: apt-get update && apt-get install -y ffmpeg)
14
- # -------------------------------------------------------------------
15
-
16
- def load_pipeline(device: str = "cuda"):
17
- """
18
- LTX-Video 파이프라인 로드:
19
- - sentencepiece 필요 (T5 토크나이저)
20
- - trust_remote_code=True (Placeholder 이슈 방지)
21
- - bf16/FP8/오프로딩은 가능한 경우에만 활성화
22
- """
 
 
 
 
 
23
  use_cuda = torch.cuda.is_available()
24
  device = "cuda" if use_cuda else "cpu"
25
- dtype = torch.bfloat16 if use_cuda else torch.float16 # bf16은 CUDA일 때만 의미
 
26
 
27
- # 1) Transformer 로드
28
  transformer = AutoModel.from_pretrained(
29
  "Lightricks/LTX-Video",
30
  subfolder="transformer",
31
  torch_dtype=dtype,
 
32
  trust_remote_code=True,
33
- variant="bf16" if dtype == torch.bfloat16 else None
34
  )
35
 
36
- # 2) FP8 layerwise casting (가능한 경우만)
37
  fp8_ok = False
38
- try:
39
- transformer.enable_layerwise_casting(
40
- storage_dtype=torch.float8_e4m3fn,
41
- compute_dtype=dtype
42
- )
43
- fp8_ok = True
44
- except Exception:
45
- fp8_ok = False # 환경 미지원 시 조용히 패스
46
 
47
- # 3) Pipeline 로드
48
  pipe = LTXPipeline.from_pretrained(
49
  "Lightricks/LTX-Video",
50
  transformer=transformer,
51
  torch_dtype=dtype,
52
  trust_remote_code=True,
53
- variant="bf16" if dtype == torch.bfloat16 else None
54
  ).to(device)
55
 
56
- # 4) 그룹 오프로딩 (가능한 경우만)
57
  offload_ok = False
58
- try:
59
- onload_device = torch.device(device)
60
- offload_device = torch.device("cpu")
61
- pipe.transformer.enable_group_offload(
62
- onload_device=onload_device,
63
- offload_device=offload_device,
64
- offload_type="leaf_level",
65
- use_stream=True
66
- )
67
- apply_group_offloading(
68
- pipe.text_encoder,
69
- onload_device=onload_device,
70
- offload_type="block_level",
71
- num_blocks_per_group=2
72
- )
73
- apply_group_offloading(
74
- pipe.vae,
75
- onload_device=onload_device,
76
- offload_type="leaf_level"
77
- )
78
- offload_ok = True
79
- except Exception:
80
- offload_ok = False
 
81
 
82
  return pipe, fp8_ok, offload_ok, device
83
 
84
 
85
  PIPE, FP8_OK, OFFLOAD_OK, DEVICE = load_pipeline()
86
 
87
- def _to_uint8_frames(frames):
88
- """
89
- (T,H,W,C) float/torch 텐서를 안전하게 uint8 numpy로 변환
90
- """
91
- import numpy as np
92
 
 
 
93
  if isinstance(frames, torch.Tensor):
94
  frames = frames.detach().to("cpu").numpy()
95
 
96
- if frames.ndim == 3:
97
- # (T,H,W) -> (T,H,W,1)
98
  frames = frames[..., None]
99
 
100
  assert frames.ndim == 4, f"Unexpected frames shape: {frames.shape}"
101
 
102
  if frames.dtype != np.uint8:
103
- # 0~1 또는 0~255 범위에 맞춰 스케일링
104
- mx = frames.max()
105
  if mx <= 1.0:
106
  frames = (np.clip(frames, 0, 1) * 255).astype(np.uint8)
107
  else:
108
  frames = np.clip(frames, 0, 255).astype(np.uint8)
109
-
110
  return frames
111
 
112
 
@@ -118,15 +117,14 @@ def generate_video(
118
  ):
119
  # 시드
120
  g = None
121
- if seed is not None:
122
- try:
123
- s = int(seed)
124
- if s >= 0:
125
- g = torch.Generator(device=DEVICE).manual_seed(s)
126
- except Exception:
127
- pass
128
 
129
- # 추론
130
  with torch.inference_mode():
131
  out = PIPE(
132
  prompt=(prompt or "").strip(),
@@ -134,40 +132,40 @@ def generate_video(
134
  width=int(width),
135
  height=int(height),
136
  num_frames=int(num_frames),
137
- fps=int(fps),
138
  decode_timestep=float(decode_timestep),
139
  decode_noise_scale=float(decode_noise_scale),
140
  num_inference_steps=int(steps),
141
- generator=g
142
  )
143
- frames = out.frames[0] # 예상: (T, H, W, C) float / torch
144
 
145
- # 프레임을 안전한 형식으로 변환
146
  frames = _to_uint8_frames(frames)
147
 
148
- # 저장 경로
149
  tmpdir = tempfile.mkdtemp()
150
  save_path = os.path.join(tmpdir, "output.mp4")
 
151
 
152
- # 1순위: diffusers 내장 saver
153
  try:
154
- export_to_video(frames, save_path, fps=int(fps))
155
  except Exception:
156
  # 폴백: imageio-ffmpeg
157
  import imageio.v3 as iio
158
- iio.imwrite(save_path, frames, fps=int(fps), codec="libx264")
159
 
160
  info = (
161
  f"FP8: {'ON' if FP8_OK else 'OFF'} | "
162
  f"Offloading: {'ON' if OFFLOAD_OK else 'OFF'} | "
163
  f"Device: {DEVICE} | "
164
- f"Frames: {frames.shape} | FPS: {int(fps)}"
165
  )
166
  return save_path, info
167
 
168
 
169
- # --------------------------- Gradio UI ---------------------------
170
- with gr.Blocks(title="LTX-Video Gradio") as demo:
171
  gr.Markdown("## 🎬 LTX-Video — Prompt to Short Video")
172
 
173
  with gr.Row():
@@ -188,7 +186,7 @@ with gr.Blocks(title="LTX-Video Gradio") as demo:
188
 
189
  with gr.Row():
190
  frames_in = gr.Slider(17, 241, value=65, step=2, label="num_frames")
191
- fps_in = gr.Slider(8, 30, value=24, step=1, label="FPS")
192
 
193
  with gr.Row():
194
  dt_in = gr.Slider(0.0, 0.2, value=0.03, step=0.001, label="decode_timestep")
 
7
  from diffusers.hooks import apply_group_offloading
8
  from diffusers.utils import export_to_video
9
 
10
+ # --------------------------------------------
11
+ # 요구 패키지(Spaces):
12
+ # requirements.txt:
13
+ # torch>=2.2
14
+ # torchvision>=0.17
15
+ # accelerate>=0.28.0
16
+ # transformers>=4.40.0
17
+ # diffusers>=0.31.0
18
+ # safetensors>=0.4.2
19
+ # sentencepiece>=0.2.0
20
+ # gradio>=4.32.0
21
+ # imageio>=2.34.0
22
+ # imageio-ffmpeg>=0.4.9
23
+ # packages.txt:
24
+ # ffmpeg
25
+ # --------------------------------------------
26
+
27
+ def load_pipeline():
28
  use_cuda = torch.cuda.is_available()
29
  device = "cuda" if use_cuda else "cpu"
30
+ # CPU는 float16/float8 불가 float32로
31
+ dtype = torch.bfloat16 if use_cuda else torch.float32
32
 
 
33
  transformer = AutoModel.from_pretrained(
34
  "Lightricks/LTX-Video",
35
  subfolder="transformer",
36
  torch_dtype=dtype,
37
+ # LTXPipeline은 trust_remote_code를 무시하지만 넣어도 무해
38
  trust_remote_code=True,
39
+ variant="bf16" if (use_cuda and dtype == torch.bfloat16) else None,
40
  )
41
 
42
+ # FP8 가능한 경우에만 시도
43
  fp8_ok = False
44
+ if use_cuda:
45
+ try:
46
+ transformer.enable_layerwise_casting(
47
+ storage_dtype=torch.float8_e4m3fn, compute_dtype=dtype
48
+ )
49
+ fp8_ok = True
50
+ except Exception:
51
+ fp8_ok = False
52
 
 
53
  pipe = LTXPipeline.from_pretrained(
54
  "Lightricks/LTX-Video",
55
  transformer=transformer,
56
  torch_dtype=dtype,
57
  trust_remote_code=True,
58
+ variant="bf16" if (use_cuda and dtype == torch.bfloat16) else None,
59
  ).to(device)
60
 
 
61
  offload_ok = False
62
+ if use_cuda:
63
+ try:
64
+ onload_device = torch.device(device)
65
+ offload_device = torch.device("cpu")
66
+ pipe.transformer.enable_group_offload(
67
+ onload_device=onload_device,
68
+ offload_device=offload_device,
69
+ offload_type="leaf_level",
70
+ use_stream=True,
71
+ )
72
+ apply_group_offloading(
73
+ pipe.text_encoder,
74
+ onload_device=onload_device,
75
+ offload_type="block_level",
76
+ num_blocks_per_group=2,
77
+ )
78
+ apply_group_offloading(
79
+ pipe.vae,
80
+ onload_device=onload_device,
81
+ offload_type="leaf_level",
82
+ )
83
+ offload_ok = True
84
+ except Exception:
85
+ offload_ok = False
86
 
87
  return pipe, fp8_ok, offload_ok, device
88
 
89
 
90
  PIPE, FP8_OK, OFFLOAD_OK, DEVICE = load_pipeline()
91
 
 
 
 
 
 
92
 
93
+ def _to_uint8_frames(frames):
94
+ # (T,H,W,C) torch/float → numpy uint8 로 안전 변환
95
  if isinstance(frames, torch.Tensor):
96
  frames = frames.detach().to("cpu").numpy()
97
 
98
+ if frames.ndim == 3: # (T,H,W) → (T,H,W,1)
 
99
  frames = frames[..., None]
100
 
101
  assert frames.ndim == 4, f"Unexpected frames shape: {frames.shape}"
102
 
103
  if frames.dtype != np.uint8:
104
+ mx = float(frames.max() if frames.size else 1.0)
 
105
  if mx <= 1.0:
106
  frames = (np.clip(frames, 0, 1) * 255).astype(np.uint8)
107
  else:
108
  frames = np.clip(frames, 0, 255).astype(np.uint8)
 
109
  return frames
110
 
111
 
 
117
  ):
118
  # 시드
119
  g = None
120
+ try:
121
+ s = int(seed)
122
+ if s >= 0:
123
+ g = torch.Generator(device=DEVICE).manual_seed(s)
124
+ except Exception:
125
+ pass
 
126
 
127
+ # -------- 추론 --------
128
  with torch.inference_mode():
129
  out = PIPE(
130
  prompt=(prompt or "").strip(),
 
132
  width=int(width),
133
  height=int(height),
134
  num_frames=int(num_frames),
135
+ # ★ LTXPipeline에는 fps 인자가 없습니다.
136
  decode_timestep=float(decode_timestep),
137
  decode_noise_scale=float(decode_noise_scale),
138
  num_inference_steps=int(steps),
139
+ generator=g,
140
  )
141
+ frames = out.frames[0]
142
 
 
143
  frames = _to_uint8_frames(frames)
144
 
145
+ # -------- 저장 --------
146
  tmpdir = tempfile.mkdtemp()
147
  save_path = os.path.join(tmpdir, "output.mp4")
148
+ target_fps = int(fps)
149
 
150
+ # 우선 diffusers saver
151
  try:
152
+ export_to_video(frames, save_path, fps=target_fps)
153
  except Exception:
154
  # 폴백: imageio-ffmpeg
155
  import imageio.v3 as iio
156
+ iio.imwrite(save_path, frames, fps=target_fps, codec="libx264")
157
 
158
  info = (
159
  f"FP8: {'ON' if FP8_OK else 'OFF'} | "
160
  f"Offloading: {'ON' if OFFLOAD_OK else 'OFF'} | "
161
  f"Device: {DEVICE} | "
162
+ f"Frames: {frames.shape} | FPS: {target_fps}"
163
  )
164
  return save_path, info
165
 
166
 
167
+ # ----------------------------- Gradio UI -----------------------------
168
+ with gr.Blocks(title="LTX-Video — Prompt to Short Video") as demo:
169
  gr.Markdown("## 🎬 LTX-Video — Prompt to Short Video")
170
 
171
  with gr.Row():
 
186
 
187
  with gr.Row():
188
  frames_in = gr.Slider(17, 241, value=65, step=2, label="num_frames")
189
+ fps_in = gr.Slider(8, 30, value=24, step=1, label="FPS (save only)")
190
 
191
  with gr.Row():
192
  dt_in = gr.Slider(0.0, 0.2, value=0.03, step=0.001, label="decode_timestep")