Spaces:

Bils
/

ShortiFoley

Running on Zero

App Files Files Community

Bils commited on 11 days ago

Commit

a1410e8

verified ·

1 Parent(s): c081cea

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -45

app.py CHANGED Viewed

@@ -7,9 +7,6 @@ os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
 import sys
 import json
-import uuid
-import time
-import shutil
 import base64
 import random
 import tempfile
@@ -43,30 +40,15 @@ WATERMARK_NOTE = "Made with ❤️ by bilsimaging.com"
 # ZeroGPU limit (<=120)
 GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
-# Globals
 _model_dict = None
 _cfg = None
 _device: Optional[torch.device] = None
 # ------------
-# Small helpers
 # ------------
-def _setup_device(pref: str = "auto", gpu_id: int = 0) -> torch.device:
-    """Pick CUDA if available, else MPS, else CPU."""
-    if pref == "auto":
-        if torch.cuda.is_available():
-            d = torch.device(f"cuda:{gpu_id}")
-        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-            d = torch.device("mps")
-        else:
-            d = torch.device("cpu")
-    else:
-        d = torch.device(pref)
-    logger.info(f"Using {d}")
-    return d
 def _ensure_repo() -> None:
     """Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
     if REPO_DIR.exists():
@@ -105,26 +87,30 @@ def prepare_once() -> None:
 # -----------------------
 # Model load & inference
 # -----------------------
-def auto_load_models() -> str:
     """
-    Load HunyuanVideo-Foley + encoders on the chosen device.
-    Ensures safetensors is preferred to avoid ZeroGPU issues with .bin checkpoints.
     """
     global _model_dict, _cfg, _device
     if _model_dict is not None and _cfg is not None:
         return "✅ Model already loaded."
-    # Make absolutely sure safetensors is preferred
-    os.environ["HF_PREFER_SAFETENSORS"] = "1"
     sys.path.append(str(REPO_DIR))
     from hunyuanvideo_foley.utils.model_utils import load_model
-    _device = _setup_device("auto", 0)
     logger.info("Loading HunyuanVideo-Foley model...")
     logger.info(f"MODEL_PATH:  {WEIGHTS_DIR}")
     logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
     try:
         _model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
@@ -222,9 +208,12 @@ def infer_single_video(
     Generate Foley audio for an uploaded video (1–6 variants).
     Returns: (list of output video paths, status message)
     """
-    # Lazy-load if needed
     if _model_dict is None or _cfg is None:
-        msg = auto_load_models()
         if not str(msg).startswith("✅"):
             return [], f"❌ {msg}"
@@ -261,8 +250,15 @@ def infer_single_video(
     return outs, f"✅ Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
 # -------------
-# Gradio UI (with MCP+API inside the same app)
 # -------------
 def _about_html() -> str:
     return f"""
@@ -292,8 +288,7 @@ def _about_html() -> str:
       <p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see “API & MCP” tab).
       Perfect for pipelines and tools like <b>n8n</b>.</p>
-      <h3>Watermark</h3>
-      <p>Each output writes a JSON sidecar including: <i>{WATERMARK_NOTE}</i>. Ask if you want a visible overlay.</p>
     </div>
     """
@@ -307,6 +302,7 @@ def create_ui() -> gr.Blocks:
     .generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
     .minor-btn button{ border-radius:10px;}
     .muted{ color:#64748b; }
     """
     with gr.Blocks(title="ShortiFoley — HunyuanVideo-Foley", css=css) as demo:
@@ -367,12 +363,13 @@ def create_ui() -> gr.Blocks:
                     api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
                 )
                 load_btn.click(
-                    fn=auto_load_models,
                     inputs=[],
                     outputs=[status],
                     api_name="/load_model",
-                    api_description="Load/initialize the ShortiFoley model and encoders."
                 )
                 # Toggle visibility based on variants
@@ -403,8 +400,7 @@ def create_ui() -> gr.Blocks:
                 # Refresh via button
                 refresh.click(_refresh_gallery, outputs=[gallery])
-                # Also refresh after generation finishes (chain on the event, NOT the button)
                 gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
             with gr.Tab("API & MCP"):
@@ -430,6 +426,13 @@ def create_ui() -> gr.Blocks:
             with gr.Tab("ℹ️ About"):
                 gr.HTML(_about_html())
         # ---- REST + MCP endpoints (inside Blocks) ----
         def _download_to_tmp(url: str) -> str:
             try:
@@ -469,10 +472,9 @@ def create_ui() -> gr.Blocks:
             num_inference_steps: int = 50,
             sample_nums: int = 1,
         ) -> Dict[str, List[str]]:
             if _model_dict is None or _cfg is None:
-                msg = auto_load_models()
-                if not str(msg).startswith("✅"):
-                    raise RuntimeError(msg)
             local = _normalize_video_input(video_url_or_b64)
             outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
             return {"videos": outs, "message": msg}
@@ -480,14 +482,14 @@ def create_ui() -> gr.Blocks:
         @gr.api
         def load_model_tool() -> str:
             """Ensure model is loaded on server (convenient for MCP/REST)."""
-            return auto_load_models()
         @gr.mcp.resource("shortifoley://status")
         def shortifoley_status() -> str:
             """Return a simple readiness string for MCP clients."""
             ready = _model_dict is not None and _cfg is not None
             dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
-            return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
         @gr.mcp.prompt()
         def foley_prompt(name: str = "default") -> str:
@@ -497,9 +499,8 @@ def create_ui() -> gr.Blocks:
                 "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
             )
-        # Auto-load model when UI first renders and populate gallery initially
-        demo.load(fn=auto_load_models, inputs=None, outputs=[status])
-        demo.load(lambda: gr.update(value=_list_gallery()), inputs=None, outputs=[gallery])
     return demo
@@ -511,7 +512,7 @@ def set_seeds(s: int = 1):
 # -------------
-# App bootstrap
 # -------------
 if __name__ == "__main__":
     logger.remove()
@@ -521,7 +522,7 @@ if __name__ == "__main__":
     logger.info("===== Application Startup =====\n")
     prepare_once()
-    # Probe imports (early surfacing)
     sys.path.append(str(REPO_DIR))
     try:
         from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process  # noqa: F401

 import sys
 import json
 import base64
 import random
 import tempfile
 # ZeroGPU limit (<=120)
 GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
+# Globals (NO CUDA INIT HERE)
 _model_dict = None
 _cfg = None
 _device: Optional[torch.device] = None
 # ------------
+# Small helpers (CPU-only; avoid touching CUDA here)
 # ------------
 def _ensure_repo() -> None:
     """Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
     if REPO_DIR.exists():
 # -----------------------
 # Model load & inference
 # -----------------------
+def auto_load_models(device: Optional[torch.device] = None) -> str:
     """
+    Load HunyuanVideo-Foley + encoders on the given device.
+    MUST be called only inside a @spaces.GPU context with device=cuda:0.
     """
     global _model_dict, _cfg, _device
     if _model_dict is not None and _cfg is not None:
         return "✅ Model already loaded."
+    # DO NOT probe CUDA here unless device is passed from GPU context
+    if device is None:
+        return "❌ Load the model inside a GPU task first (use the Load button or run Generate)."
+    os.environ["HF_PREFER_SAFETENSORS"] = "1"  # enforce again for safety
     sys.path.append(str(REPO_DIR))
     from hunyuanvideo_foley.utils.model_utils import load_model
+    _device = device
     logger.info("Loading HunyuanVideo-Foley model...")
     logger.info(f"MODEL_PATH:  {WEIGHTS_DIR}")
     logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
+    logger.info(f"TARGET_DEVICE: {_device}")
     try:
         _model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
     Generate Foley audio for an uploaded video (1–6 variants).
     Returns: (list of output video paths, status message)
     """
+    # Safe: inside GPU context, we can use CUDA
+    device = torch.device("cuda:0")
+    # Lazy-load if needed on GPU
     if _model_dict is None or _cfg is None:
+        msg = auto_load_models(device)
         if not str(msg).startswith("✅"):
             return [], f"❌ {msg}"
     return outs, f"✅ Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
+# Separate GPU task to preload model (used by the Load button & API)
+@spaces.GPU(duration=GPU_DURATION)
+def gpu_load_models() -> str:
+    device = torch.device("cuda:0")
+    return auto_load_models(device)
 # -------------
+# Gradio UI (with MCP + REST endpoints)
 # -------------
 def _about_html() -> str:
     return f"""
       <p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see “API & MCP” tab).
       Perfect for pipelines and tools like <b>n8n</b>.</p>
     </div>
     """
     .generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
     .minor-btn button{ border-radius:10px;}
     .muted{ color:#64748b; }
+    .footer-text{ margin-top:16px; text-align:center; color:#475569; font-size:.95rem;}
     """
     with gr.Blocks(title="ShortiFoley — HunyuanVideo-Foley", css=css) as demo:
                     api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
                 )
+                # Load model (GPU-safe)
                 load_btn.click(
+                    fn=gpu_load_models,
                     inputs=[],
                     outputs=[status],
                     api_name="/load_model",
+                    api_description="Load/initialize the ShortiFoley model and encoders (runs on GPU)."
                 )
                 # Toggle visibility based on variants
                 # Refresh via button
                 refresh.click(_refresh_gallery, outputs=[gallery])
+                # Also refresh after generation finishes
                 gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
             with gr.Tab("API & MCP"):
             with gr.Tab("ℹ️ About"):
                 gr.HTML(_about_html())
+        # Footer
+        gr.HTML("""
+        <div class="footer-text">
+            <p>🚀 Created by <b>bilsimaging.com</b> &bull; Powered by HunyuanVideo-Foley &bull; Generate high-quality audio from video and text descriptions</p>
+        </div>
+        """)
         # ---- REST + MCP endpoints (inside Blocks) ----
         def _download_to_tmp(url: str) -> str:
             try:
             num_inference_steps: int = 50,
             sample_nums: int = 1,
         ) -> Dict[str, List[str]]:
+            # Ensure model is ready (GPU-safe path)
             if _model_dict is None or _cfg is None:
+                _ = gpu_load_models()
             local = _normalize_video_input(video_url_or_b64)
             outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
             return {"videos": outs, "message": msg}
         @gr.api
         def load_model_tool() -> str:
             """Ensure model is loaded on server (convenient for MCP/REST)."""
+            return gpu_load_models()
         @gr.mcp.resource("shortifoley://status")
         def shortifoley_status() -> str:
             """Return a simple readiness string for MCP clients."""
             ready = _model_dict is not None and _cfg is not None
             dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
+            return f"ShortiFoley status: {'ready' if ready else 'idle'} | device={dev} | outputs={OUTPUTS_DIR}"
         @gr.mcp.prompt()
         def foley_prompt(name: str = "default") -> str:
                 "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
             )
+        # IMPORTANT: Do NOT auto-load models here to avoid CUDA init in main process
+        demo.load(lambda: "Ready. Click 'Load model' or 'Generate' to start.", inputs=None, outputs=None)
     return demo
 # -------------
+# App bootstrap (CPU only)
 # -------------
 if __name__ == "__main__":
     logger.remove()
     logger.info("===== Application Startup =====\n")
     prepare_once()
+    # Probe imports (early surfacing) — CPU-safe
     sys.path.append(str(REPO_DIR))
     try:
         from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process  # noqa: F401