Bils commited on
Commit
03653a8
Β·
verified Β·
1 Parent(s): 7797ff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -102
app.py CHANGED
@@ -7,6 +7,9 @@ os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
7
 
8
  import sys
9
  import json
 
 
 
10
  import base64
11
  import random
12
  import tempfile
@@ -22,6 +25,7 @@ from loguru import logger
22
  from huggingface_hub import snapshot_download
23
  import spaces # HF Spaces ZeroGPU & MCP integration
24
 
 
25
  # -------------------------
26
  # Constants & configuration
27
  # -------------------------
@@ -141,7 +145,7 @@ def auto_load_models() -> str:
141
 
142
 
143
  def _merge_audio_video(audio_path: str, video_path: str, out_path: str) -> None:
144
- """Preferred: project's util; fallback to ffmpeg."""
145
  sys.path.append(str(REPO_DIR))
146
  try:
147
  from hunyuanvideo_foley.utils.media_utils import merge_audio_video
@@ -393,112 +397,109 @@ def create_ui() -> gr.Blocks:
393
  label="Saved Results"
394
  )
395
  refresh = gr.Button("πŸ”„ Refresh Gallery")
396
- refresh.click(lambda: gr.update(value=_list_gallery()), outputs=[gallery])
 
 
 
 
 
 
 
 
397
 
398
  with gr.Tab("API & MCP"):
399
- gr.Markdown("""
400
- ### REST examples
401
-
402
- **POST** `/api_generate_from_url`
403
- ```json
404
- {
405
- "video_url_or_b64": "https://yourhost/sample.mp4",
406
- "text_prompt": "metallic clink; hollow room reverb",
407
- "guidance_scale": 4.5,
408
- "num_inference_steps": 50,
409
- "sample_nums": 2
410
- }
411
- ```
412
-
413
- **POST** `/load_model_tool`
414
- Loads the model proactively (useful before batch runs).
415
-
416
- ### MCP resources & prompt
417
- - `shortifoley://status` β†’ quick health info
418
- - `foley_prompt` β†’ reusable guidance for describing the sound
419
-
420
- Works great with n8n: call `load_model_tool` once, then `api_generate_from_url` for each clip.
421
- """)
422
 
423
  with gr.Tab("ℹ️ About"):
424
  gr.HTML(_about_html())
425
 
426
- # Keep gallery fresh after generation
427
- generate.click(lambda: gr.update(value=_list_gallery()), outputs=[gallery])
428
-
429
- # ---- REST + MCP endpoints (inside Blocks) ----
430
- def _download_to_tmp(url: str) -> str:
431
- try:
432
- import requests
433
- except Exception:
434
- raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
435
- r = requests.get(url, timeout=30)
436
- r.raise_for_status()
437
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
438
- tmp.write(r.content)
439
- tmp.flush()
440
- tmp.close()
441
- return tmp.name
442
-
443
- def _maybe_from_base64(data_url_or_b64: str) -> str:
444
- b64 = data_url_or_b64
445
- if data_url_or_b64.startswith("data:"):
446
- b64 = data_url_or_b64.split(",", 1)[-1]
447
- raw = base64.b64decode(b64)
448
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
449
- tmp.write(raw)
450
- tmp.flush()
451
- tmp.close()
452
- return tmp.name
453
-
454
- def _normalize_video_input(video_url_or_b64: str) -> str:
455
- v = (video_url_or_b64 or "").strip()
456
- if v.startswith("http://") or v.startswith("https://"):
457
- return _download_to_tmp(v)
458
- return _maybe_from_base64(v)
459
-
460
- @gr.api
461
- def api_generate_from_url(
462
- video_url_or_b64: str,
463
- text_prompt: str = "",
464
- guidance_scale: float = 4.5,
465
- num_inference_steps: int = 50,
466
- sample_nums: int = 1,
467
- ) -> Dict[str, List[str]]:
468
- if _model_dict is None or _cfg is None:
469
- msg = auto_load_models()
470
- if not str(msg).startswith("βœ…"):
471
- raise RuntimeError(msg)
472
- local = _normalize_video_input(video_url_or_b64)
473
- outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
474
- return {"videos": outs, "message": msg}
475
-
476
- @gr.api
477
- def load_model_tool() -> str:
478
- """Ensure model is loaded on server (convenient for MCP/REST)."""
479
- return auto_load_models()
480
-
481
- @gr.mcp.resource("shortifoley://status")
482
- def shortifoley_status() -> str:
483
- """Return a simple readiness string for MCP clients."""
484
- ready = _model_dict is not None and _cfg is not None
485
- dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
486
- return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
487
-
488
- @gr.mcp.prompt()
489
- def foley_prompt(name: str = "default") -> str:
490
- """Reusable guidance for describing sound ambience."""
491
- return (
492
- "Describe the expected environmental sound precisely. Mention material, rhythm, intensity, and ambience.\n"
493
- "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
494
- )
495
-
496
- # Auto-load model when UI first renders
497
- demo.load(
498
- fn=auto_load_models,
499
- inputs=None,
500
- outputs=[status]
501
- )
502
 
503
  return demo
504
 
@@ -538,4 +539,3 @@ if __name__ == "__main__":
538
  show_error=True,
539
  mcp_server=True, # MCP on (great for n8n)
540
  )
541
-
 
7
 
8
  import sys
9
  import json
10
+ import uuid
11
+ import time
12
+ import shutil
13
  import base64
14
  import random
15
  import tempfile
 
25
  from huggingface_hub import snapshot_download
26
  import spaces # HF Spaces ZeroGPU & MCP integration
27
 
28
+
29
  # -------------------------
30
  # Constants & configuration
31
  # -------------------------
 
145
 
146
 
147
  def _merge_audio_video(audio_path: str, video_path: str, out_path: str) -> None:
148
+ """Preferred: project’s util; fallback to ffmpeg."""
149
  sys.path.append(str(REPO_DIR))
150
  try:
151
  from hunyuanvideo_foley.utils.media_utils import merge_audio_video
 
397
  label="Saved Results"
398
  )
399
  refresh = gr.Button("πŸ”„ Refresh Gallery")
400
+
401
+ def _refresh_gallery():
402
+ return gr.update(value=_list_gallery())
403
+
404
+ # Refresh via button
405
+ refresh.click(_refresh_gallery, outputs=[gallery])
406
+
407
+ # Also refresh after generation finishes
408
+ generate.then(_refresh_gallery, inputs=None, outputs=[gallery])
409
 
410
  with gr.Tab("API & MCP"):
411
+ gr.Markdown(
412
+ "### REST examples\n\n"
413
+ "**POST** `api_generate_from_url`\n"
414
+ "```json\n"
415
+ "{\n"
416
+ ' "video_url_or_b64": "https://yourhost/sample.mp4",\n'
417
+ ' "text_prompt": "metallic clink; hollow room reverb",\n'
418
+ ' "guidance_scale": 4.5,\n'
419
+ ' "num_inference_steps": 50,\n'
420
+ ' "sample_nums": 2\n'
421
+ "}\n"
422
+ "```\n\n"
423
+ "**POST** `load_model_tool` β€” loads the model proactively.\n\n"
424
+ "### MCP resources & prompt\n"
425
+ "- `shortifoley://status` β†’ quick health info\n"
426
+ "- `foley_prompt` β†’ reusable guidance for describing the sound\n\n"
427
+ "Works with n8n: call `load_model_tool` once, then `api_generate_from_url` per clip."
428
+ )
 
 
 
 
 
429
 
430
  with gr.Tab("ℹ️ About"):
431
  gr.HTML(_about_html())
432
 
433
+ # ---- REST + MCP endpoints (inside Blocks) ----
434
+ def _download_to_tmp(url: str) -> str:
435
+ try:
436
+ import requests # ensure added in requirements if you use this endpoint
437
+ except Exception:
438
+ raise RuntimeError("Missing dependency 'requests'. Add it to requirements.txt to use URL inputs.")
439
+ r = requests.get(url, timeout=30)
440
+ r.raise_for_status()
441
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
442
+ tmp.write(r.content)
443
+ tmp.flush()
444
+ tmp.close()
445
+ return tmp.name
446
+
447
+ def _maybe_from_base64(data_url_or_b64: str) -> str:
448
+ b64 = data_url_or_b64
449
+ if data_url_or_b64.startswith("data:"):
450
+ b64 = data_url_or_b64.split(",", 1)[-1]
451
+ raw = base64.b64decode(b64)
452
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
453
+ tmp.write(raw)
454
+ tmp.flush()
455
+ tmp.close()
456
+ return tmp.name
457
+
458
+ def _normalize_video_input(video_url_or_b64: str) -> str:
459
+ v = (video_url_or_b64 or "").strip()
460
+ if v.startswith("http://") or v.startswith("https://"):
461
+ return _download_to_tmp(v)
462
+ return _maybe_from_base64(v)
463
+
464
+ @gr.api
465
+ def api_generate_from_url(
466
+ video_url_or_b64: str,
467
+ text_prompt: str = "",
468
+ guidance_scale: float = 4.5,
469
+ num_inference_steps: int = 50,
470
+ sample_nums: int = 1,
471
+ ) -> Dict[str, List[str]]:
472
+ if _model_dict is None or _cfg is None:
473
+ msg = auto_load_models()
474
+ if not str(msg).startswith("βœ…"):
475
+ raise RuntimeError(msg)
476
+ local = _normalize_video_input(video_url_or_b64)
477
+ outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
478
+ return {"videos": outs, "message": msg}
479
+
480
+ @gr.api
481
+ def load_model_tool() -> str:
482
+ """Ensure model is loaded on server (convenient for MCP/REST)."""
483
+ return auto_load_models()
484
+
485
+ @gr.mcp.resource("shortifoley://status")
486
+ def shortifoley_status() -> str:
487
+ """Return a simple readiness string for MCP clients."""
488
+ ready = _model_dict is not None and _cfg is not None
489
+ dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
490
+ return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
491
+
492
+ @gr.mcp.prompt()
493
+ def foley_prompt(name: str = "default") -> str:
494
+ """Reusable guidance for describing sound ambience."""
495
+ return (
496
+ "Describe the expected environmental sound precisely. Mention material, rhythm, intensity, and ambience.\n"
497
+ "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
498
+ )
499
+
500
+ # Auto-load model when UI first renders and populate gallery initially
501
+ demo.load(fn=auto_load_models, inputs=None, outputs=[status])
502
+ demo.load(lambda: gr.update(value=_list_gallery()), inputs=None, outputs=[gallery])
 
 
 
 
 
 
503
 
504
  return demo
505
 
 
539
  show_error=True,
540
  mcp_server=True, # MCP on (great for n8n)
541
  )