Spaces:

englissi
/

videogenerator

Sleeping

App Files Files Community

englissi commited on 23 days ago

Commit

d0237b3

verified ·

1 Parent(s): d9bde98

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -81

app.py CHANGED Viewed

@@ -7,106 +7,105 @@ from diffusers import LTXPipeline, AutoModel
 from diffusers.hooks import apply_group_offloading
 from diffusers.utils import export_to_video
-# -------------------------------------------------------------------
-# 환경 의존성 참고:
-# pip install -U torch torchvision accelerate transformers diffusers safetensors sentencepiece gradio imageio imageio-ffmpeg
-# (Spaces/도커라면 ffmpeg 바이너리 필요할 수 있음: apt-get update && apt-get install -y ffmpeg)
-# -------------------------------------------------------------------
-def load_pipeline(device: str = "cuda"):
-    """
-    LTX-Video 파이프라인 로드:
-    - sentencepiece 필요 (T5 토크나이저)
-    - trust_remote_code=True (Placeholder 이슈 방지)
-    - bf16/FP8/오프로딩은 가능한 경우에만 활성화
-    """
     use_cuda = torch.cuda.is_available()
     device = "cuda" if use_cuda else "cpu"
-    dtype = torch.bfloat16 if use_cuda else torch.float16  # bf16은 CUDA일 때만 의미
-    # 1) Transformer 로드
     transformer = AutoModel.from_pretrained(
         "Lightricks/LTX-Video",
         subfolder="transformer",
         torch_dtype=dtype,
         trust_remote_code=True,
-        variant="bf16" if dtype == torch.bfloat16 else None
     )
-    # 2) FP8 layerwise casting (가능한 경우만)
     fp8_ok = False
-    try:
-        transformer.enable_layerwise_casting(
-            storage_dtype=torch.float8_e4m3fn,
-            compute_dtype=dtype
-        )
-        fp8_ok = True
-    except Exception:
-        fp8_ok = False  # 환경 미지원 시 조용히 패스
-    # 3) Pipeline 로드
     pipe = LTXPipeline.from_pretrained(
         "Lightricks/LTX-Video",
         transformer=transformer,
         torch_dtype=dtype,
         trust_remote_code=True,
-        variant="bf16" if dtype == torch.bfloat16 else None
     ).to(device)
-    # 4) 그룹 오프로딩 (가능한 경우만)
     offload_ok = False
-    try:
-        onload_device = torch.device(device)
-        offload_device = torch.device("cpu")
-        pipe.transformer.enable_group_offload(
-            onload_device=onload_device,
-            offload_device=offload_device,
-            offload_type="leaf_level",
-            use_stream=True
-        )
-        apply_group_offloading(
-            pipe.text_encoder,
-            onload_device=onload_device,
-            offload_type="block_level",
-            num_blocks_per_group=2
-        )
-        apply_group_offloading(
-            pipe.vae,
-            onload_device=onload_device,
-            offload_type="leaf_level"
-        )
-        offload_ok = True
-    except Exception:
-        offload_ok = False
     return pipe, fp8_ok, offload_ok, device
 PIPE, FP8_OK, OFFLOAD_OK, DEVICE = load_pipeline()
-def _to_uint8_frames(frames):
-    """
-    (T,H,W,C) float/torch 텐서를 안전하게 uint8 numpy로 변환
-    """
-    import numpy as np
     if isinstance(frames, torch.Tensor):
         frames = frames.detach().to("cpu").numpy()
-    if frames.ndim == 3:
-        # (T,H,W) -> (T,H,W,1)
         frames = frames[..., None]
     assert frames.ndim == 4, f"Unexpected frames shape: {frames.shape}"
     if frames.dtype != np.uint8:
-        # 0~1 또는 0~255 범위에 맞춰 스케일링
-        mx = frames.max()
         if mx <= 1.0:
             frames = (np.clip(frames, 0, 1) * 255).astype(np.uint8)
         else:
             frames = np.clip(frames, 0, 255).astype(np.uint8)
     return frames
@@ -118,15 +117,14 @@ def generate_video(
 ):
     # 시드
     g = None
-    if seed is not None:
-        try:
-            s = int(seed)
-            if s >= 0:
-                g = torch.Generator(device=DEVICE).manual_seed(s)
-        except Exception:
-            pass
-    # 추론
     with torch.inference_mode():
         out = PIPE(
             prompt=(prompt or "").strip(),
@@ -134,40 +132,40 @@ def generate_video(
             width=int(width),
             height=int(height),
             num_frames=int(num_frames),
-            fps=int(fps),
             decode_timestep=float(decode_timestep),
             decode_noise_scale=float(decode_noise_scale),
             num_inference_steps=int(steps),
-            generator=g
         )
-        frames = out.frames[0]  # 예상: (T, H, W, C) float / torch
-    # 프레임을 안전한 형식으로 변환
     frames = _to_uint8_frames(frames)
-    # 저장 경로
     tmpdir = tempfile.mkdtemp()
     save_path = os.path.join(tmpdir, "output.mp4")
-    # 1순위: diffusers 내장 saver
     try:
-        export_to_video(frames, save_path, fps=int(fps))
     except Exception:
         # 폴백: imageio-ffmpeg
         import imageio.v3 as iio
-        iio.imwrite(save_path, frames, fps=int(fps), codec="libx264")
     info = (
         f"FP8: {'ON' if FP8_OK else 'OFF'} | "
         f"Offloading: {'ON' if OFFLOAD_OK else 'OFF'} | "
         f"Device: {DEVICE} | "
-        f"Frames: {frames.shape} | FPS: {int(fps)}"
     )
     return save_path, info
-# --------------------------- Gradio UI ---------------------------
-with gr.Blocks(title="LTX-Video Gradio") as demo:
     gr.Markdown("## 🎬 LTX-Video — Prompt to Short Video")
     with gr.Row():
@@ -188,7 +186,7 @@ with gr.Blocks(title="LTX-Video Gradio") as demo:
     with gr.Row():
         frames_in = gr.Slider(17, 241, value=65, step=2, label="num_frames")
-        fps_in    = gr.Slider(8, 30, value=24, step=1, label="FPS")
     with gr.Row():
         dt_in  = gr.Slider(0.0, 0.2, value=0.03, step=0.001, label="decode_timestep")

 from diffusers.hooks import apply_group_offloading
 from diffusers.utils import export_to_video
+# --------------------------------------------
+# 요구 패키지(Spaces):
+# requirements.txt:
+#   torch>=2.2
+#   torchvision>=0.17
+#   accelerate>=0.28.0
+#   transformers>=4.40.0
+#   diffusers>=0.31.0
+#   safetensors>=0.4.2
+#   sentencepiece>=0.2.0
+#   gradio>=4.32.0
+#   imageio>=2.34.0
+#   imageio-ffmpeg>=0.4.9
+# packages.txt:
+#   ffmpeg
+# --------------------------------------------
+def load_pipeline():
     use_cuda = torch.cuda.is_available()
     device = "cuda" if use_cuda else "cpu"
+    # CPU는 float16/float8 불가 → float32로
+    dtype = torch.bfloat16 if use_cuda else torch.float32
     transformer = AutoModel.from_pretrained(
         "Lightricks/LTX-Video",
         subfolder="transformer",
         torch_dtype=dtype,
+        # LTXPipeline은 trust_remote_code를 무시하지만 넣어도 무해
         trust_remote_code=True,
+        variant="bf16" if (use_cuda and dtype == torch.bfloat16) else None,
     )
+    # FP8은 가능한 경우에만 시도
     fp8_ok = False
+    if use_cuda:
+        try:
+            transformer.enable_layerwise_casting(
+                storage_dtype=torch.float8_e4m3fn, compute_dtype=dtype
+            )
+            fp8_ok = True
+        except Exception:
+            fp8_ok = False
     pipe = LTXPipeline.from_pretrained(
         "Lightricks/LTX-Video",
         transformer=transformer,
         torch_dtype=dtype,
         trust_remote_code=True,
+        variant="bf16" if (use_cuda and dtype == torch.bfloat16) else None,
     ).to(device)
     offload_ok = False
+    if use_cuda:
+        try:
+            onload_device = torch.device(device)
+            offload_device = torch.device("cpu")
+            pipe.transformer.enable_group_offload(
+                onload_device=onload_device,
+                offload_device=offload_device,
+                offload_type="leaf_level",
+                use_stream=True,
+            )
+            apply_group_offloading(
+                pipe.text_encoder,
+                onload_device=onload_device,
+                offload_type="block_level",
+                num_blocks_per_group=2,
+            )
+            apply_group_offloading(
+                pipe.vae,
+                onload_device=onload_device,
+                offload_type="leaf_level",
+            )
+            offload_ok = True
+        except Exception:
+            offload_ok = False
     return pipe, fp8_ok, offload_ok, device
 PIPE, FP8_OK, OFFLOAD_OK, DEVICE = load_pipeline()
+def _to_uint8_frames(frames):
+    # (T,H,W,C) torch/float → numpy uint8 로 안전 변환
     if isinstance(frames, torch.Tensor):
         frames = frames.detach().to("cpu").numpy()
+    if frames.ndim == 3:  # (T,H,W) → (T,H,W,1)
         frames = frames[..., None]
     assert frames.ndim == 4, f"Unexpected frames shape: {frames.shape}"
     if frames.dtype != np.uint8:
+        mx = float(frames.max() if frames.size else 1.0)
         if mx <= 1.0:
             frames = (np.clip(frames, 0, 1) * 255).astype(np.uint8)
         else:
             frames = np.clip(frames, 0, 255).astype(np.uint8)
     return frames
 ):
     # 시드
     g = None
+    try:
+        s = int(seed)
+        if s >= 0:
+            g = torch.Generator(device=DEVICE).manual_seed(s)
+    except Exception:
+        pass
+    # -------- 추론 --------
     with torch.inference_mode():
         out = PIPE(
             prompt=(prompt or "").strip(),
             width=int(width),
             height=int(height),
             num_frames=int(num_frames),
+            # ★ LTXPipeline에는 fps 인자가 없습니다.
             decode_timestep=float(decode_timestep),
             decode_noise_scale=float(decode_noise_scale),
             num_inference_steps=int(steps),
+            generator=g,
         )
+        frames = out.frames[0]
     frames = _to_uint8_frames(frames)
+    # -------- 저장 --------
     tmpdir = tempfile.mkdtemp()
     save_path = os.path.join(tmpdir, "output.mp4")
+    target_fps = int(fps)
+    # 우선 diffusers saver
     try:
+        export_to_video(frames, save_path, fps=target_fps)
     except Exception:
         # 폴백: imageio-ffmpeg
         import imageio.v3 as iio
+        iio.imwrite(save_path, frames, fps=target_fps, codec="libx264")
     info = (
         f"FP8: {'ON' if FP8_OK else 'OFF'} | "
         f"Offloading: {'ON' if OFFLOAD_OK else 'OFF'} | "
         f"Device: {DEVICE} | "
+        f"Frames: {frames.shape} | FPS: {target_fps}"
     )
     return save_path, info
+# ----------------------------- Gradio UI -----------------------------
+with gr.Blocks(title="LTX-Video — Prompt to Short Video") as demo:
     gr.Markdown("## 🎬 LTX-Video — Prompt to Short Video")
     with gr.Row():
     with gr.Row():
         frames_in = gr.Slider(17, 241, value=65, step=2, label="num_frames")
+        fps_in    = gr.Slider(8, 30, value=24, step=1, label="FPS (save only)")
     with gr.Row():
         dt_in  = gr.Slider(0.0, 0.2, value=0.03, step=0.001, label="decode_timestep")