Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,9 @@ os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
|
|
7 |
|
8 |
import sys
|
9 |
import json
|
|
|
|
|
|
|
10 |
import base64
|
11 |
import random
|
12 |
import tempfile
|
@@ -22,6 +25,7 @@ from loguru import logger
|
|
22 |
from huggingface_hub import snapshot_download
|
23 |
import spaces # HF Spaces ZeroGPU & MCP integration
|
24 |
|
|
|
25 |
# -------------------------
|
26 |
# Constants & configuration
|
27 |
# -------------------------
|
@@ -141,7 +145,7 @@ def auto_load_models() -> str:
|
|
141 |
|
142 |
|
143 |
def _merge_audio_video(audio_path: str, video_path: str, out_path: str) -> None:
|
144 |
-
"""Preferred: project
|
145 |
sys.path.append(str(REPO_DIR))
|
146 |
try:
|
147 |
from hunyuanvideo_foley.utils.media_utils import merge_audio_video
|
@@ -393,112 +397,109 @@ def create_ui() -> gr.Blocks:
|
|
393 |
label="Saved Results"
|
394 |
)
|
395 |
refresh = gr.Button("π Refresh Gallery")
|
396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
with gr.Tab("API & MCP"):
|
399 |
-
gr.Markdown(
|
400 |
-
### REST examples
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
"
|
406 |
-
"
|
407 |
-
"
|
408 |
-
"
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
- `shortifoley://status` β quick health info
|
418 |
-
- `foley_prompt` β reusable guidance for describing the sound
|
419 |
-
|
420 |
-
Works great with n8n: call `load_model_tool` once, then `api_generate_from_url` for each clip.
|
421 |
-
""")
|
422 |
|
423 |
with gr.Tab("βΉοΈ About"):
|
424 |
gr.HTML(_about_html())
|
425 |
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
""
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
)
|
495 |
-
|
496 |
-
# Auto-load model when UI first renders
|
497 |
-
demo.load(
|
498 |
-
fn=auto_load_models,
|
499 |
-
inputs=None,
|
500 |
-
outputs=[status]
|
501 |
-
)
|
502 |
|
503 |
return demo
|
504 |
|
@@ -538,4 +539,3 @@ if __name__ == "__main__":
|
|
538 |
show_error=True,
|
539 |
mcp_server=True, # MCP on (great for n8n)
|
540 |
)
|
541 |
-
|
|
|
7 |
|
8 |
import sys
|
9 |
import json
|
10 |
+
import uuid
|
11 |
+
import time
|
12 |
+
import shutil
|
13 |
import base64
|
14 |
import random
|
15 |
import tempfile
|
|
|
25 |
from huggingface_hub import snapshot_download
|
26 |
import spaces # HF Spaces ZeroGPU & MCP integration
|
27 |
|
28 |
+
|
29 |
# -------------------------
|
30 |
# Constants & configuration
|
31 |
# -------------------------
|
|
|
145 |
|
146 |
|
147 |
def _merge_audio_video(audio_path: str, video_path: str, out_path: str) -> None:
|
148 |
+
"""Preferred: projectβs util; fallback to ffmpeg."""
|
149 |
sys.path.append(str(REPO_DIR))
|
150 |
try:
|
151 |
from hunyuanvideo_foley.utils.media_utils import merge_audio_video
|
|
|
397 |
label="Saved Results"
|
398 |
)
|
399 |
refresh = gr.Button("π Refresh Gallery")
|
400 |
+
|
401 |
+
def _refresh_gallery():
|
402 |
+
return gr.update(value=_list_gallery())
|
403 |
+
|
404 |
+
# Refresh via button
|
405 |
+
refresh.click(_refresh_gallery, outputs=[gallery])
|
406 |
+
|
407 |
+
# Also refresh after generation finishes
|
408 |
+
generate.then(_refresh_gallery, inputs=None, outputs=[gallery])
|
409 |
|
410 |
with gr.Tab("API & MCP"):
|
411 |
+
gr.Markdown(
|
412 |
+
"### REST examples\n\n"
|
413 |
+
"**POST** `api_generate_from_url`\n"
|
414 |
+
"```json\n"
|
415 |
+
"{\n"
|
416 |
+
' "video_url_or_b64": "https://yourhost/sample.mp4",\n'
|
417 |
+
' "text_prompt": "metallic clink; hollow room reverb",\n'
|
418 |
+
' "guidance_scale": 4.5,\n'
|
419 |
+
' "num_inference_steps": 50,\n'
|
420 |
+
' "sample_nums": 2\n'
|
421 |
+
"}\n"
|
422 |
+
"```\n\n"
|
423 |
+
"**POST** `load_model_tool` β loads the model proactively.\n\n"
|
424 |
+
"### MCP resources & prompt\n"
|
425 |
+
"- `shortifoley://status` β quick health info\n"
|
426 |
+
"- `foley_prompt` β reusable guidance for describing the sound\n\n"
|
427 |
+
"Works with n8n: call `load_model_tool` once, then `api_generate_from_url` per clip."
|
428 |
+
)
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
with gr.Tab("βΉοΈ About"):
|
431 |
gr.HTML(_about_html())
|
432 |
|
433 |
+
# ---- REST + MCP endpoints (inside Blocks) ----
|
434 |
+
def _download_to_tmp(url: str) -> str:
|
435 |
+
try:
|
436 |
+
import requests # ensure added in requirements if you use this endpoint
|
437 |
+
except Exception:
|
438 |
+
raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
|
439 |
+
r = requests.get(url, timeout=30)
|
440 |
+
r.raise_for_status()
|
441 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
442 |
+
tmp.write(r.content)
|
443 |
+
tmp.flush()
|
444 |
+
tmp.close()
|
445 |
+
return tmp.name
|
446 |
+
|
447 |
+
def _maybe_from_base64(data_url_or_b64: str) -> str:
|
448 |
+
b64 = data_url_or_b64
|
449 |
+
if data_url_or_b64.startswith("data:"):
|
450 |
+
b64 = data_url_or_b64.split(",", 1)[-1]
|
451 |
+
raw = base64.b64decode(b64)
|
452 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
453 |
+
tmp.write(raw)
|
454 |
+
tmp.flush()
|
455 |
+
tmp.close()
|
456 |
+
return tmp.name
|
457 |
+
|
458 |
+
def _normalize_video_input(video_url_or_b64: str) -> str:
|
459 |
+
v = (video_url_or_b64 or "").strip()
|
460 |
+
if v.startswith("http://") or v.startswith("https://"):
|
461 |
+
return _download_to_tmp(v)
|
462 |
+
return _maybe_from_base64(v)
|
463 |
+
|
464 |
+
@gr.api
|
465 |
+
def api_generate_from_url(
|
466 |
+
video_url_or_b64: str,
|
467 |
+
text_prompt: str = "",
|
468 |
+
guidance_scale: float = 4.5,
|
469 |
+
num_inference_steps: int = 50,
|
470 |
+
sample_nums: int = 1,
|
471 |
+
) -> Dict[str, List[str]]:
|
472 |
+
if _model_dict is None or _cfg is None:
|
473 |
+
msg = auto_load_models()
|
474 |
+
if not str(msg).startswith("β
"):
|
475 |
+
raise RuntimeError(msg)
|
476 |
+
local = _normalize_video_input(video_url_or_b64)
|
477 |
+
outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
|
478 |
+
return {"videos": outs, "message": msg}
|
479 |
+
|
480 |
+
@gr.api
|
481 |
+
def load_model_tool() -> str:
|
482 |
+
"""Ensure model is loaded on server (convenient for MCP/REST)."""
|
483 |
+
return auto_load_models()
|
484 |
+
|
485 |
+
@gr.mcp.resource("shortifoley://status")
|
486 |
+
def shortifoley_status() -> str:
|
487 |
+
"""Return a simple readiness string for MCP clients."""
|
488 |
+
ready = _model_dict is not None and _cfg is not None
|
489 |
+
dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
|
490 |
+
return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
|
491 |
+
|
492 |
+
@gr.mcp.prompt()
|
493 |
+
def foley_prompt(name: str = "default") -> str:
|
494 |
+
"""Reusable guidance for describing sound ambience."""
|
495 |
+
return (
|
496 |
+
"Describe the expected environmental sound precisely. Mention material, rhythm, intensity, and ambience.\n"
|
497 |
+
"Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
|
498 |
+
)
|
499 |
+
|
500 |
+
# Auto-load model when UI first renders and populate gallery initially
|
501 |
+
demo.load(fn=auto_load_models, inputs=None, outputs=[status])
|
502 |
+
demo.load(lambda: gr.update(value=_list_gallery()), inputs=None, outputs=[gallery])
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
|
504 |
return demo
|
505 |
|
|
|
539 |
show_error=True,
|
540 |
mcp_server=True, # MCP on (great for n8n)
|
541 |
)
|
|