Spaces:

Bils
/

ShortiFoley

Running on Zero

App Files Files Community

Bils commited on 5 days ago

Commit

03653a8

verified ·

1 Parent(s): 7797ff1

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -102

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
 import sys
 import json
 import base64
 import random
 import tempfile
@@ -22,6 +25,7 @@ from loguru import logger
 from huggingface_hub import snapshot_download
 import spaces  # HF Spaces ZeroGPU & MCP integration
 # -------------------------
 # Constants & configuration
 # -------------------------
@@ -141,7 +145,7 @@ def auto_load_models() -> str:
 def _merge_audio_video(audio_path: str, video_path: str, out_path: str) -> None:
-    """Preferred: project's util; fallback to ffmpeg."""
     sys.path.append(str(REPO_DIR))
     try:
         from hunyuanvideo_foley.utils.media_utils import merge_audio_video
@@ -393,112 +397,109 @@ def create_ui() -> gr.Blocks:
                     label="Saved Results"
                 )
                 refresh = gr.Button("🔄 Refresh Gallery")
-                refresh.click(lambda: gr.update(value=_list_gallery()), outputs=[gallery])
             with gr.Tab("API & MCP"):
-                gr.Markdown("""
-### REST examples
-**POST** `/api_generate_from_url`
-```json
-{
-  "video_url_or_b64": "https://yourhost/sample.mp4",
-  "text_prompt": "metallic clink; hollow room reverb",
-  "guidance_scale": 4.5,
-  "num_inference_steps": 50,
-  "sample_nums": 2
-}
-```
-**POST** `/load_model_tool`
-Loads the model proactively (useful before batch runs).
-### MCP resources & prompt
-- `shortifoley://status` → quick health info
-- `foley_prompt` → reusable guidance for describing the sound
-Works great with n8n: call `load_model_tool` once, then `api_generate_from_url` for each clip.
-""")
             with gr.Tab("ℹ️ About"):
                 gr.HTML(_about_html())
-    # Keep gallery fresh after generation
-    generate.click(lambda: gr.update(value=_list_gallery()), outputs=[gallery])
-    # ---- REST + MCP endpoints (inside Blocks) ----
-    def _download_to_tmp(url: str) -> str:
-        try:
-            import requests
-        except Exception:
-            raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
-        r = requests.get(url, timeout=30)
-        r.raise_for_status()
-        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
-        tmp.write(r.content)
-        tmp.flush()
-        tmp.close()
-        return tmp.name
-    def _maybe_from_base64(data_url_or_b64: str) -> str:
-        b64 = data_url_or_b64
-        if data_url_or_b64.startswith("data:"):
-            b64 = data_url_or_b64.split(",", 1)[-1]
-        raw = base64.b64decode(b64)
-        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
-        tmp.write(raw)
-        tmp.flush()
-        tmp.close()
-        return tmp.name
-    def _normalize_video_input(video_url_or_b64: str) -> str:
-        v = (video_url_or_b64 or "").strip()
-        if v.startswith("http://") or v.startswith("https://"):
-            return _download_to_tmp(v)
-        return _maybe_from_base64(v)
-    @gr.api
-    def api_generate_from_url(
-        video_url_or_b64: str,
-        text_prompt: str = "",
-        guidance_scale: float = 4.5,
-        num_inference_steps: int = 50,
-        sample_nums: int = 1,
-    ) -> Dict[str, List[str]]:
-        if _model_dict is None or _cfg is None:
-            msg = auto_load_models()
-            if not str(msg).startswith("✅"):
-                raise RuntimeError(msg)
-        local = _normalize_video_input(video_url_or_b64)
-        outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
-        return {"videos": outs, "message": msg}
-    @gr.api
-    def load_model_tool() -> str:
-        """Ensure model is loaded on server (convenient for MCP/REST)."""
-        return auto_load_models()
-    @gr.mcp.resource("shortifoley://status")
-    def shortifoley_status() -> str:
-        """Return a simple readiness string for MCP clients."""
-        ready = _model_dict is not None and _cfg is not None
-        dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
-        return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
-    @gr.mcp.prompt()
-    def foley_prompt(name: str = "default") -> str:
-        """Reusable guidance for describing sound ambience."""
-        return (
-            "Describe the expected environmental sound precisely. Mention material, rhythm, intensity, and ambience.\n"
-            "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
-        )
-    # Auto-load model when UI first renders
-    demo.load(
-        fn=auto_load_models,
-        inputs=None,
-        outputs=[status]
-    )
     return demo
@@ -538,4 +539,3 @@ if __name__ == "__main__":
         show_error=True,
         mcp_server=True,   # MCP on (great for n8n)
     )

 import sys
 import json
+import uuid
+import time
+import shutil
 import base64
 import random
 import tempfile
 from huggingface_hub import snapshot_download
 import spaces  # HF Spaces ZeroGPU & MCP integration
 # -------------------------
 # Constants & configuration
 # -------------------------
 def _merge_audio_video(audio_path: str, video_path: str, out_path: str) -> None:
+    """Preferred: project’s util; fallback to ffmpeg."""
     sys.path.append(str(REPO_DIR))
     try:
         from hunyuanvideo_foley.utils.media_utils import merge_audio_video
                     label="Saved Results"
                 )
                 refresh = gr.Button("🔄 Refresh Gallery")
+                def _refresh_gallery():
+                    return gr.update(value=_list_gallery())
+                # Refresh via button
+                refresh.click(_refresh_gallery, outputs=[gallery])
+                # Also refresh after generation finishes
+                generate.then(_refresh_gallery, inputs=None, outputs=[gallery])
             with gr.Tab("API & MCP"):
+                gr.Markdown(
+                    "### REST examples\n\n"
+                    "**POST** `api_generate_from_url`\n"
+                    "```json\n"
+                    "{\n"
+                    '  "video_url_or_b64": "https://yourhost/sample.mp4",\n'
+                    '  "text_prompt": "metallic clink; hollow room reverb",\n'
+                    '  "guidance_scale": 4.5,\n'
+                    '  "num_inference_steps": 50,\n'
+                    '  "sample_nums": 2\n'
+                    "}\n"
+                    "```\n\n"
+                    "**POST** `load_model_tool` — loads the model proactively.\n\n"
+                    "### MCP resources & prompt\n"
+                    "- `shortifoley://status` → quick health info\n"
+                    "- `foley_prompt` → reusable guidance for describing the sound\n\n"
+                    "Works with n8n: call `load_model_tool` once, then `api_generate_from_url` per clip."
+                )
             with gr.Tab("ℹ️ About"):
                 gr.HTML(_about_html())
+        # ---- REST + MCP endpoints (inside Blocks) ----
+        def _download_to_tmp(url: str) -> str:
+            try:
+                import requests  # ensure added in requirements if you use this endpoint
+            except Exception:
+                raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
+            r = requests.get(url, timeout=30)
+            r.raise_for_status()
+            tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+            tmp.write(r.content)
+            tmp.flush()
+            tmp.close()
+            return tmp.name
+        def _maybe_from_base64(data_url_or_b64: str) -> str:
+            b64 = data_url_or_b64
+            if data_url_or_b64.startswith("data:"):
+                b64 = data_url_or_b64.split(",", 1)[-1]
+            raw = base64.b64decode(b64)
+            tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+            tmp.write(raw)
+            tmp.flush()
+            tmp.close()
+            return tmp.name
+        def _normalize_video_input(video_url_or_b64: str) -> str:
+            v = (video_url_or_b64 or "").strip()
+            if v.startswith("http://") or v.startswith("https://"):
+                return _download_to_tmp(v)
+            return _maybe_from_base64(v)
+        @gr.api
+        def api_generate_from_url(
+            video_url_or_b64: str,
+            text_prompt: str = "",
+            guidance_scale: float = 4.5,
+            num_inference_steps: int = 50,
+            sample_nums: int = 1,
+        ) -> Dict[str, List[str]]:
+            if _model_dict is None or _cfg is None:
+                msg = auto_load_models()
+                if not str(msg).startswith("✅"):
+                    raise RuntimeError(msg)
+            local = _normalize_video_input(video_url_or_b64)
+            outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
+            return {"videos": outs, "message": msg}
+        @gr.api
+        def load_model_tool() -> str:
+            """Ensure model is loaded on server (convenient for MCP/REST)."""
+            return auto_load_models()
+        @gr.mcp.resource("shortifoley://status")
+        def shortifoley_status() -> str:
+            """Return a simple readiness string for MCP clients."""
+            ready = _model_dict is not None and _cfg is not None
+            dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
+            return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
+        @gr.mcp.prompt()
+        def foley_prompt(name: str = "default") -> str:
+            """Reusable guidance for describing sound ambience."""
+            return (
+                "Describe the expected environmental sound precisely. Mention material, rhythm, intensity, and ambience.\n"
+                "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
+            )
+        # Auto-load model when UI first renders and populate gallery initially
+        demo.load(fn=auto_load_models, inputs=None, outputs=[status])
+        demo.load(lambda: gr.update(value=_list_gallery()), inputs=None, outputs=[gallery])
     return demo
         show_error=True,
         mcp_server=True,   # MCP on (great for n8n)
     )