Bils commited on
Commit
a1410e8
·
verified ·
1 Parent(s): c081cea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -45
app.py CHANGED
@@ -7,9 +7,6 @@ os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
7
 
8
  import sys
9
  import json
10
- import uuid
11
- import time
12
- import shutil
13
  import base64
14
  import random
15
  import tempfile
@@ -43,30 +40,15 @@ WATERMARK_NOTE = "Made with ❤️ by bilsimaging.com"
43
  # ZeroGPU limit (<=120)
44
  GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
45
 
46
- # Globals
47
  _model_dict = None
48
  _cfg = None
49
  _device: Optional[torch.device] = None
50
 
51
 
52
  # ------------
53
- # Small helpers
54
  # ------------
55
- def _setup_device(pref: str = "auto", gpu_id: int = 0) -> torch.device:
56
- """Pick CUDA if available, else MPS, else CPU."""
57
- if pref == "auto":
58
- if torch.cuda.is_available():
59
- d = torch.device(f"cuda:{gpu_id}")
60
- elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
61
- d = torch.device("mps")
62
- else:
63
- d = torch.device("cpu")
64
- else:
65
- d = torch.device(pref)
66
- logger.info(f"Using {d}")
67
- return d
68
-
69
-
70
  def _ensure_repo() -> None:
71
  """Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
72
  if REPO_DIR.exists():
@@ -105,26 +87,30 @@ def prepare_once() -> None:
105
  # -----------------------
106
  # Model load & inference
107
  # -----------------------
108
- def auto_load_models() -> str:
109
  """
110
- Load HunyuanVideo-Foley + encoders on the chosen device.
111
- Ensures safetensors is preferred to avoid ZeroGPU issues with .bin checkpoints.
112
  """
113
  global _model_dict, _cfg, _device
114
 
115
  if _model_dict is not None and _cfg is not None:
116
  return "✅ Model already loaded."
117
 
118
- # Make absolutely sure safetensors is preferred
119
- os.environ["HF_PREFER_SAFETENSORS"] = "1"
 
 
 
120
 
121
  sys.path.append(str(REPO_DIR))
122
  from hunyuanvideo_foley.utils.model_utils import load_model
123
 
124
- _device = _setup_device("auto", 0)
125
  logger.info("Loading HunyuanVideo-Foley model...")
126
  logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
127
  logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
 
128
 
129
  try:
130
  _model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
@@ -222,9 +208,12 @@ def infer_single_video(
222
  Generate Foley audio for an uploaded video (1–6 variants).
223
  Returns: (list of output video paths, status message)
224
  """
225
- # Lazy-load if needed
 
 
 
226
  if _model_dict is None or _cfg is None:
227
- msg = auto_load_models()
228
  if not str(msg).startswith("✅"):
229
  return [], f"❌ {msg}"
230
 
@@ -261,8 +250,15 @@ def infer_single_video(
261
  return outs, f"✅ Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
262
 
263
 
 
 
 
 
 
 
 
264
  # -------------
265
- # Gradio UI (with MCP+API inside the same app)
266
  # -------------
267
  def _about_html() -> str:
268
  return f"""
@@ -292,8 +288,7 @@ def _about_html() -> str:
292
  <p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see “API & MCP” tab).
293
  Perfect for pipelines and tools like <b>n8n</b>.</p>
294
 
295
- <h3>Watermark</h3>
296
- <p>Each output writes a JSON sidecar including: <i>{WATERMARK_NOTE}</i>. Ask if you want a visible overlay.</p>
297
  </div>
298
  """
299
 
@@ -307,6 +302,7 @@ def create_ui() -> gr.Blocks:
307
  .generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
308
  .minor-btn button{ border-radius:10px;}
309
  .muted{ color:#64748b; }
 
310
  """
311
  with gr.Blocks(title="ShortiFoley — HunyuanVideo-Foley", css=css) as demo:
312
 
@@ -367,12 +363,13 @@ def create_ui() -> gr.Blocks:
367
  api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
368
  )
369
 
 
370
  load_btn.click(
371
- fn=auto_load_models,
372
  inputs=[],
373
  outputs=[status],
374
  api_name="/load_model",
375
- api_description="Load/initialize the ShortiFoley model and encoders."
376
  )
377
 
378
  # Toggle visibility based on variants
@@ -403,8 +400,7 @@ def create_ui() -> gr.Blocks:
403
 
404
  # Refresh via button
405
  refresh.click(_refresh_gallery, outputs=[gallery])
406
-
407
- # Also refresh after generation finishes (chain on the event, NOT the button)
408
  gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
409
 
410
  with gr.Tab("API & MCP"):
@@ -430,6 +426,13 @@ def create_ui() -> gr.Blocks:
430
  with gr.Tab("ℹ️ About"):
431
  gr.HTML(_about_html())
432
 
 
 
 
 
 
 
 
433
  # ---- REST + MCP endpoints (inside Blocks) ----
434
  def _download_to_tmp(url: str) -> str:
435
  try:
@@ -469,10 +472,9 @@ def create_ui() -> gr.Blocks:
469
  num_inference_steps: int = 50,
470
  sample_nums: int = 1,
471
  ) -> Dict[str, List[str]]:
 
472
  if _model_dict is None or _cfg is None:
473
- msg = auto_load_models()
474
- if not str(msg).startswith("✅"):
475
- raise RuntimeError(msg)
476
  local = _normalize_video_input(video_url_or_b64)
477
  outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
478
  return {"videos": outs, "message": msg}
@@ -480,14 +482,14 @@ def create_ui() -> gr.Blocks:
480
  @gr.api
481
  def load_model_tool() -> str:
482
  """Ensure model is loaded on server (convenient for MCP/REST)."""
483
- return auto_load_models()
484
 
485
  @gr.mcp.resource("shortifoley://status")
486
  def shortifoley_status() -> str:
487
  """Return a simple readiness string for MCP clients."""
488
  ready = _model_dict is not None and _cfg is not None
489
  dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
490
- return f"ShortiFoley status: {'ready' if ready else 'loading'} | device={dev} | outputs={OUTPUTS_DIR}"
491
 
492
  @gr.mcp.prompt()
493
  def foley_prompt(name: str = "default") -> str:
@@ -497,9 +499,8 @@ def create_ui() -> gr.Blocks:
497
  "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
498
  )
499
 
500
- # Auto-load model when UI first renders and populate gallery initially
501
- demo.load(fn=auto_load_models, inputs=None, outputs=[status])
502
- demo.load(lambda: gr.update(value=_list_gallery()), inputs=None, outputs=[gallery])
503
 
504
  return demo
505
 
@@ -511,7 +512,7 @@ def set_seeds(s: int = 1):
511
 
512
 
513
  # -------------
514
- # App bootstrap
515
  # -------------
516
  if __name__ == "__main__":
517
  logger.remove()
@@ -521,7 +522,7 @@ if __name__ == "__main__":
521
  logger.info("===== Application Startup =====\n")
522
  prepare_once()
523
 
524
- # Probe imports (early surfacing)
525
  sys.path.append(str(REPO_DIR))
526
  try:
527
  from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401
 
7
 
8
  import sys
9
  import json
 
 
 
10
  import base64
11
  import random
12
  import tempfile
 
40
  # ZeroGPU limit (<=120)
41
  GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
42
 
43
+ # Globals (NO CUDA INIT HERE)
44
  _model_dict = None
45
  _cfg = None
46
  _device: Optional[torch.device] = None
47
 
48
 
49
  # ------------
50
+ # Small helpers (CPU-only; avoid touching CUDA here)
51
  # ------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def _ensure_repo() -> None:
53
  """Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
54
  if REPO_DIR.exists():
 
87
  # -----------------------
88
  # Model load & inference
89
  # -----------------------
90
+ def auto_load_models(device: Optional[torch.device] = None) -> str:
91
  """
92
+ Load HunyuanVideo-Foley + encoders on the given device.
93
+ MUST be called only inside a @spaces.GPU context with device=cuda:0.
94
  """
95
  global _model_dict, _cfg, _device
96
 
97
  if _model_dict is not None and _cfg is not None:
98
  return "✅ Model already loaded."
99
 
100
+ # DO NOT probe CUDA here unless device is passed from GPU context
101
+ if device is None:
102
+ return "❌ Load the model inside a GPU task first (use the Load button or run Generate)."
103
+
104
+ os.environ["HF_PREFER_SAFETENSORS"] = "1" # enforce again for safety
105
 
106
  sys.path.append(str(REPO_DIR))
107
  from hunyuanvideo_foley.utils.model_utils import load_model
108
 
109
+ _device = device
110
  logger.info("Loading HunyuanVideo-Foley model...")
111
  logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
112
  logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
113
+ logger.info(f"TARGET_DEVICE: {_device}")
114
 
115
  try:
116
  _model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
 
208
  Generate Foley audio for an uploaded video (1–6 variants).
209
  Returns: (list of output video paths, status message)
210
  """
211
+ # Safe: inside GPU context, we can use CUDA
212
+ device = torch.device("cuda:0")
213
+
214
+ # Lazy-load if needed on GPU
215
  if _model_dict is None or _cfg is None:
216
+ msg = auto_load_models(device)
217
  if not str(msg).startswith("✅"):
218
  return [], f"❌ {msg}"
219
 
 
250
  return outs, f"✅ Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
251
 
252
 
253
+ # Separate GPU task to preload model (used by the Load button & API)
254
+ @spaces.GPU(duration=GPU_DURATION)
255
+ def gpu_load_models() -> str:
256
+ device = torch.device("cuda:0")
257
+ return auto_load_models(device)
258
+
259
+
260
  # -------------
261
+ # Gradio UI (with MCP + REST endpoints)
262
  # -------------
263
  def _about_html() -> str:
264
  return f"""
 
288
  <p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see “API & MCP” tab).
289
  Perfect for pipelines and tools like <b>n8n</b>.</p>
290
 
291
+
 
292
  </div>
293
  """
294
 
 
302
  .generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
303
  .minor-btn button{ border-radius:10px;}
304
  .muted{ color:#64748b; }
305
+ .footer-text{ margin-top:16px; text-align:center; color:#475569; font-size:.95rem;}
306
  """
307
  with gr.Blocks(title="ShortiFoley — HunyuanVideo-Foley", css=css) as demo:
308
 
 
363
  api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
364
  )
365
 
366
+ # Load model (GPU-safe)
367
  load_btn.click(
368
+ fn=gpu_load_models,
369
  inputs=[],
370
  outputs=[status],
371
  api_name="/load_model",
372
+ api_description="Load/initialize the ShortiFoley model and encoders (runs on GPU)."
373
  )
374
 
375
  # Toggle visibility based on variants
 
400
 
401
  # Refresh via button
402
  refresh.click(_refresh_gallery, outputs=[gallery])
403
+ # Also refresh after generation finishes
 
404
  gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
405
 
406
  with gr.Tab("API & MCP"):
 
426
  with gr.Tab("ℹ️ About"):
427
  gr.HTML(_about_html())
428
 
429
+ # Footer
430
+ gr.HTML("""
431
+ <div class="footer-text">
432
+ <p>🚀 Created by <b>bilsimaging.com</b> &bull; Powered by HunyuanVideo-Foley &bull; Generate high-quality audio from video and text descriptions</p>
433
+ </div>
434
+ """)
435
+
436
  # ---- REST + MCP endpoints (inside Blocks) ----
437
  def _download_to_tmp(url: str) -> str:
438
  try:
 
472
  num_inference_steps: int = 50,
473
  sample_nums: int = 1,
474
  ) -> Dict[str, List[str]]:
475
+ # Ensure model is ready (GPU-safe path)
476
  if _model_dict is None or _cfg is None:
477
+ _ = gpu_load_models()
 
 
478
  local = _normalize_video_input(video_url_or_b64)
479
  outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
480
  return {"videos": outs, "message": msg}
 
482
  @gr.api
483
  def load_model_tool() -> str:
484
  """Ensure model is loaded on server (convenient for MCP/REST)."""
485
+ return gpu_load_models()
486
 
487
  @gr.mcp.resource("shortifoley://status")
488
  def shortifoley_status() -> str:
489
  """Return a simple readiness string for MCP clients."""
490
  ready = _model_dict is not None and _cfg is not None
491
  dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
492
+ return f"ShortiFoley status: {'ready' if ready else 'idle'} | device={dev} | outputs={OUTPUTS_DIR}"
493
 
494
  @gr.mcp.prompt()
495
  def foley_prompt(name: str = "default") -> str:
 
499
  "Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
500
  )
501
 
502
+ # IMPORTANT: Do NOT auto-load models here to avoid CUDA init in main process
503
+ demo.load(lambda: "Ready. Click 'Load model' or 'Generate' to start.", inputs=None, outputs=None)
 
504
 
505
  return demo
506
 
 
512
 
513
 
514
  # -------------
515
+ # App bootstrap (CPU only)
516
  # -------------
517
  if __name__ == "__main__":
518
  logger.remove()
 
522
  logger.info("===== Application Startup =====\n")
523
  prepare_once()
524
 
525
+ # Probe imports (early surfacing) — CPU-safe
526
  sys.path.append(str(REPO_DIR))
527
  try:
528
  from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401