Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,9 +7,6 @@ os.environ.setdefault("HF_PREFER_SAFETENSORS", "1")
|
|
7 |
|
8 |
import sys
|
9 |
import json
|
10 |
-
import uuid
|
11 |
-
import time
|
12 |
-
import shutil
|
13 |
import base64
|
14 |
import random
|
15 |
import tempfile
|
@@ -43,30 +40,15 @@ WATERMARK_NOTE = "Made with ❤️ by bilsimaging.com"
|
|
43 |
# ZeroGPU limit (<=120)
|
44 |
GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
|
45 |
|
46 |
-
# Globals
|
47 |
_model_dict = None
|
48 |
_cfg = None
|
49 |
_device: Optional[torch.device] = None
|
50 |
|
51 |
|
52 |
# ------------
|
53 |
-
# Small helpers
|
54 |
# ------------
|
55 |
-
def _setup_device(pref: str = "auto", gpu_id: int = 0) -> torch.device:
|
56 |
-
"""Pick CUDA if available, else MPS, else CPU."""
|
57 |
-
if pref == "auto":
|
58 |
-
if torch.cuda.is_available():
|
59 |
-
d = torch.device(f"cuda:{gpu_id}")
|
60 |
-
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
61 |
-
d = torch.device("mps")
|
62 |
-
else:
|
63 |
-
d = torch.device("cpu")
|
64 |
-
else:
|
65 |
-
d = torch.device(pref)
|
66 |
-
logger.info(f"Using {d}")
|
67 |
-
return d
|
68 |
-
|
69 |
-
|
70 |
def _ensure_repo() -> None:
|
71 |
"""Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
|
72 |
if REPO_DIR.exists():
|
@@ -105,26 +87,30 @@ def prepare_once() -> None:
|
|
105 |
# -----------------------
|
106 |
# Model load & inference
|
107 |
# -----------------------
|
108 |
-
def auto_load_models() -> str:
|
109 |
"""
|
110 |
-
Load HunyuanVideo-Foley + encoders on the
|
111 |
-
|
112 |
"""
|
113 |
global _model_dict, _cfg, _device
|
114 |
|
115 |
if _model_dict is not None and _cfg is not None:
|
116 |
return "✅ Model already loaded."
|
117 |
|
118 |
-
#
|
119 |
-
|
|
|
|
|
|
|
120 |
|
121 |
sys.path.append(str(REPO_DIR))
|
122 |
from hunyuanvideo_foley.utils.model_utils import load_model
|
123 |
|
124 |
-
_device =
|
125 |
logger.info("Loading HunyuanVideo-Foley model...")
|
126 |
logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
|
127 |
logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
|
|
|
128 |
|
129 |
try:
|
130 |
_model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
|
@@ -222,9 +208,12 @@ def infer_single_video(
|
|
222 |
Generate Foley audio for an uploaded video (1–6 variants).
|
223 |
Returns: (list of output video paths, status message)
|
224 |
"""
|
225 |
-
#
|
|
|
|
|
|
|
226 |
if _model_dict is None or _cfg is None:
|
227 |
-
msg = auto_load_models()
|
228 |
if not str(msg).startswith("✅"):
|
229 |
return [], f"❌ {msg}"
|
230 |
|
@@ -261,8 +250,15 @@ def infer_single_video(
|
|
261 |
return outs, f"✅ Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
|
262 |
|
263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
# -------------
|
265 |
-
# Gradio UI (with MCP+
|
266 |
# -------------
|
267 |
def _about_html() -> str:
|
268 |
return f"""
|
@@ -292,8 +288,7 @@ def _about_html() -> str:
|
|
292 |
<p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see “API & MCP” tab).
|
293 |
Perfect for pipelines and tools like <b>n8n</b>.</p>
|
294 |
|
295 |
-
|
296 |
-
<p>Each output writes a JSON sidecar including: <i>{WATERMARK_NOTE}</i>. Ask if you want a visible overlay.</p>
|
297 |
</div>
|
298 |
"""
|
299 |
|
@@ -307,6 +302,7 @@ def create_ui() -> gr.Blocks:
|
|
307 |
.generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
|
308 |
.minor-btn button{ border-radius:10px;}
|
309 |
.muted{ color:#64748b; }
|
|
|
310 |
"""
|
311 |
with gr.Blocks(title="ShortiFoley — HunyuanVideo-Foley", css=css) as demo:
|
312 |
|
@@ -367,12 +363,13 @@ def create_ui() -> gr.Blocks:
|
|
367 |
api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
|
368 |
)
|
369 |
|
|
|
370 |
load_btn.click(
|
371 |
-
fn=
|
372 |
inputs=[],
|
373 |
outputs=[status],
|
374 |
api_name="/load_model",
|
375 |
-
api_description="Load/initialize the ShortiFoley model and encoders."
|
376 |
)
|
377 |
|
378 |
# Toggle visibility based on variants
|
@@ -403,8 +400,7 @@ def create_ui() -> gr.Blocks:
|
|
403 |
|
404 |
# Refresh via button
|
405 |
refresh.click(_refresh_gallery, outputs=[gallery])
|
406 |
-
|
407 |
-
# Also refresh after generation finishes (chain on the event, NOT the button)
|
408 |
gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
|
409 |
|
410 |
with gr.Tab("API & MCP"):
|
@@ -430,6 +426,13 @@ def create_ui() -> gr.Blocks:
|
|
430 |
with gr.Tab("ℹ️ About"):
|
431 |
gr.HTML(_about_html())
|
432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
# ---- REST + MCP endpoints (inside Blocks) ----
|
434 |
def _download_to_tmp(url: str) -> str:
|
435 |
try:
|
@@ -469,10 +472,9 @@ def create_ui() -> gr.Blocks:
|
|
469 |
num_inference_steps: int = 50,
|
470 |
sample_nums: int = 1,
|
471 |
) -> Dict[str, List[str]]:
|
|
|
472 |
if _model_dict is None or _cfg is None:
|
473 |
-
|
474 |
-
if not str(msg).startswith("✅"):
|
475 |
-
raise RuntimeError(msg)
|
476 |
local = _normalize_video_input(video_url_or_b64)
|
477 |
outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
|
478 |
return {"videos": outs, "message": msg}
|
@@ -480,14 +482,14 @@ def create_ui() -> gr.Blocks:
|
|
480 |
@gr.api
|
481 |
def load_model_tool() -> str:
|
482 |
"""Ensure model is loaded on server (convenient for MCP/REST)."""
|
483 |
-
return
|
484 |
|
485 |
@gr.mcp.resource("shortifoley://status")
|
486 |
def shortifoley_status() -> str:
|
487 |
"""Return a simple readiness string for MCP clients."""
|
488 |
ready = _model_dict is not None and _cfg is not None
|
489 |
dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
|
490 |
-
return f"ShortiFoley status: {'ready' if ready else '
|
491 |
|
492 |
@gr.mcp.prompt()
|
493 |
def foley_prompt(name: str = "default") -> str:
|
@@ -497,9 +499,8 @@ def create_ui() -> gr.Blocks:
|
|
497 |
"Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
|
498 |
)
|
499 |
|
500 |
-
#
|
501 |
-
demo.load(
|
502 |
-
demo.load(lambda: gr.update(value=_list_gallery()), inputs=None, outputs=[gallery])
|
503 |
|
504 |
return demo
|
505 |
|
@@ -511,7 +512,7 @@ def set_seeds(s: int = 1):
|
|
511 |
|
512 |
|
513 |
# -------------
|
514 |
-
# App bootstrap
|
515 |
# -------------
|
516 |
if __name__ == "__main__":
|
517 |
logger.remove()
|
@@ -521,7 +522,7 @@ if __name__ == "__main__":
|
|
521 |
logger.info("===== Application Startup =====\n")
|
522 |
prepare_once()
|
523 |
|
524 |
-
# Probe imports (early surfacing)
|
525 |
sys.path.append(str(REPO_DIR))
|
526 |
try:
|
527 |
from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401
|
|
|
7 |
|
8 |
import sys
|
9 |
import json
|
|
|
|
|
|
|
10 |
import base64
|
11 |
import random
|
12 |
import tempfile
|
|
|
40 |
# ZeroGPU limit (<=120)
|
41 |
GPU_DURATION = int(os.environ.get("GPU_DURATION_SECS", "110"))
|
42 |
|
43 |
+
# Globals (NO CUDA INIT HERE)
|
44 |
_model_dict = None
|
45 |
_cfg = None
|
46 |
_device: Optional[torch.device] = None
|
47 |
|
48 |
|
49 |
# ------------
|
50 |
+
# Small helpers (CPU-only; avoid touching CUDA here)
|
51 |
# ------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
def _ensure_repo() -> None:
|
53 |
"""Shallow-clone Tencent repo with LFS smudge disabled (avoid LFS quota checkout)."""
|
54 |
if REPO_DIR.exists():
|
|
|
87 |
# -----------------------
|
88 |
# Model load & inference
|
89 |
# -----------------------
|
90 |
+
def auto_load_models(device: Optional[torch.device] = None) -> str:
|
91 |
"""
|
92 |
+
Load HunyuanVideo-Foley + encoders on the given device.
|
93 |
+
MUST be called only inside a @spaces.GPU context with device=cuda:0.
|
94 |
"""
|
95 |
global _model_dict, _cfg, _device
|
96 |
|
97 |
if _model_dict is not None and _cfg is not None:
|
98 |
return "✅ Model already loaded."
|
99 |
|
100 |
+
# DO NOT probe CUDA here unless device is passed from GPU context
|
101 |
+
if device is None:
|
102 |
+
return "❌ Load the model inside a GPU task first (use the Load button or run Generate)."
|
103 |
+
|
104 |
+
os.environ["HF_PREFER_SAFETENSORS"] = "1" # enforce again for safety
|
105 |
|
106 |
sys.path.append(str(REPO_DIR))
|
107 |
from hunyuanvideo_foley.utils.model_utils import load_model
|
108 |
|
109 |
+
_device = device
|
110 |
logger.info("Loading HunyuanVideo-Foley model...")
|
111 |
logger.info(f"MODEL_PATH: {WEIGHTS_DIR}")
|
112 |
logger.info(f"CONFIG_PATH: {CONFIG_PATH}")
|
113 |
+
logger.info(f"TARGET_DEVICE: {_device}")
|
114 |
|
115 |
try:
|
116 |
_model_dict, _cfg = load_model(str(WEIGHTS_DIR), str(CONFIG_PATH), _device)
|
|
|
208 |
Generate Foley audio for an uploaded video (1–6 variants).
|
209 |
Returns: (list of output video paths, status message)
|
210 |
"""
|
211 |
+
# Safe: inside GPU context, we can use CUDA
|
212 |
+
device = torch.device("cuda:0")
|
213 |
+
|
214 |
+
# Lazy-load if needed on GPU
|
215 |
if _model_dict is None or _cfg is None:
|
216 |
+
msg = auto_load_models(device)
|
217 |
if not str(msg).startswith("✅"):
|
218 |
return [], f"❌ {msg}"
|
219 |
|
|
|
250 |
return outs, f"✅ Generated {len(outs)} result(s). Saved to {OUTPUTS_DIR}/"
|
251 |
|
252 |
|
253 |
+
# Separate GPU task to preload model (used by the Load button & API)
|
254 |
+
@spaces.GPU(duration=GPU_DURATION)
|
255 |
+
def gpu_load_models() -> str:
|
256 |
+
device = torch.device("cuda:0")
|
257 |
+
return auto_load_models(device)
|
258 |
+
|
259 |
+
|
260 |
# -------------
|
261 |
+
# Gradio UI (with MCP + REST endpoints)
|
262 |
# -------------
|
263 |
def _about_html() -> str:
|
264 |
return f"""
|
|
|
288 |
<p>This Space exposes an <b>MCP server</b> and simple REST endpoints (see “API & MCP” tab).
|
289 |
Perfect for pipelines and tools like <b>n8n</b>.</p>
|
290 |
|
291 |
+
|
|
|
292 |
</div>
|
293 |
"""
|
294 |
|
|
|
302 |
.generate-btn button{ font-weight:800; border-radius:12px; padding:10px 18px;}
|
303 |
.minor-btn button{ border-radius:10px;}
|
304 |
.muted{ color:#64748b; }
|
305 |
+
.footer-text{ margin-top:16px; text-align:center; color:#475569; font-size:.95rem;}
|
306 |
"""
|
307 |
with gr.Blocks(title="ShortiFoley — HunyuanVideo-Foley", css=css) as demo:
|
308 |
|
|
|
363 |
api_description="Generate Foley audio for an uploaded video. Returns up to 6 video+audio files."
|
364 |
)
|
365 |
|
366 |
+
# Load model (GPU-safe)
|
367 |
load_btn.click(
|
368 |
+
fn=gpu_load_models,
|
369 |
inputs=[],
|
370 |
outputs=[status],
|
371 |
api_name="/load_model",
|
372 |
+
api_description="Load/initialize the ShortiFoley model and encoders (runs on GPU)."
|
373 |
)
|
374 |
|
375 |
# Toggle visibility based on variants
|
|
|
400 |
|
401 |
# Refresh via button
|
402 |
refresh.click(_refresh_gallery, outputs=[gallery])
|
403 |
+
# Also refresh after generation finishes
|
|
|
404 |
gen_evt.then(_refresh_gallery, inputs=None, outputs=[gallery])
|
405 |
|
406 |
with gr.Tab("API & MCP"):
|
|
|
426 |
with gr.Tab("ℹ️ About"):
|
427 |
gr.HTML(_about_html())
|
428 |
|
429 |
+
# Footer
|
430 |
+
gr.HTML("""
|
431 |
+
<div class="footer-text">
|
432 |
+
<p>🚀 Created by <b>bilsimaging.com</b> • Powered by HunyuanVideo-Foley • Generate high-quality audio from video and text descriptions</p>
|
433 |
+
</div>
|
434 |
+
""")
|
435 |
+
|
436 |
# ---- REST + MCP endpoints (inside Blocks) ----
|
437 |
def _download_to_tmp(url: str) -> str:
|
438 |
try:
|
|
|
472 |
num_inference_steps: int = 50,
|
473 |
sample_nums: int = 1,
|
474 |
) -> Dict[str, List[str]]:
|
475 |
+
# Ensure model is ready (GPU-safe path)
|
476 |
if _model_dict is None or _cfg is None:
|
477 |
+
_ = gpu_load_models()
|
|
|
|
|
478 |
local = _normalize_video_input(video_url_or_b64)
|
479 |
outs, msg = infer_single_video(local, text_prompt, guidance_scale, num_inference_steps, sample_nums)
|
480 |
return {"videos": outs, "message": msg}
|
|
|
482 |
@gr.api
|
483 |
def load_model_tool() -> str:
|
484 |
"""Ensure model is loaded on server (convenient for MCP/REST)."""
|
485 |
+
return gpu_load_models()
|
486 |
|
487 |
@gr.mcp.resource("shortifoley://status")
|
488 |
def shortifoley_status() -> str:
|
489 |
"""Return a simple readiness string for MCP clients."""
|
490 |
ready = _model_dict is not None and _cfg is not None
|
491 |
dev = "cuda" if (_device and _device.type == "cuda") else ("mps" if (_device and _device.type == "mps") else "cpu")
|
492 |
+
return f"ShortiFoley status: {'ready' if ready else 'idle'} | device={dev} | outputs={OUTPUTS_DIR}"
|
493 |
|
494 |
@gr.mcp.prompt()
|
495 |
def foley_prompt(name: str = "default") -> str:
|
|
|
499 |
"Example: 'Soft leather footfalls on wet pavement with distant traffic hiss; occasional splashes.'"
|
500 |
)
|
501 |
|
502 |
+
# IMPORTANT: Do NOT auto-load models here to avoid CUDA init in main process
|
503 |
+
demo.load(lambda: "Ready. Click 'Load model' or 'Generate' to start.", inputs=None, outputs=None)
|
|
|
504 |
|
505 |
return demo
|
506 |
|
|
|
512 |
|
513 |
|
514 |
# -------------
|
515 |
+
# App bootstrap (CPU only)
|
516 |
# -------------
|
517 |
if __name__ == "__main__":
|
518 |
logger.remove()
|
|
|
522 |
logger.info("===== Application Startup =====\n")
|
523 |
prepare_once()
|
524 |
|
525 |
+
# Probe imports (early surfacing) — CPU-safe
|
526 |
sys.path.append(str(REPO_DIR))
|
527 |
try:
|
528 |
from hunyuanvideo_foley.utils.model_utils import load_model, denoise_process # noqa: F401
|