Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import os, json, tempfile, subprocess, shutil,
|
2 |
from pathlib import Path
|
3 |
from typing import Optional, Tuple, List
|
4 |
|
@@ -15,12 +15,14 @@ OUT_DIR = ROOT / "outputs"
|
|
15 |
ASSETS = ROOT / "assets"
|
16 |
ASSETS.mkdir(exist_ok=True)
|
17 |
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
|
21 |
-
MAX_SECS = int(os.environ.get("MAX_SECS", "22"))
|
22 |
TARGET_H = int(os.environ.get("TARGET_H", "480")) # downscale target height
|
23 |
-
SR
|
24 |
|
25 |
def sh(cmd: str):
|
26 |
print(">>", cmd)
|
@@ -38,20 +40,19 @@ def ffprobe_duration(path: str) -> float:
|
|
38 |
|
39 |
def _clone_without_lfs():
|
40 |
"""
|
41 |
-
|
42 |
-
Falls back to sparse checkout with
|
43 |
"""
|
44 |
if REPO_DIR.exists():
|
45 |
return
|
46 |
|
47 |
-
# Attempt 1: shallow clone
|
48 |
try:
|
49 |
sh(
|
50 |
"GIT_LFS_SKIP_SMUDGE=1 "
|
51 |
"git -c filter.lfs.smudge= -c filter.lfs.required=false "
|
52 |
f"clone --depth 1 https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley.git {REPO_DIR}"
|
53 |
)
|
54 |
-
# Optional: remove assets folder if present
|
55 |
assets = REPO_DIR / "assets"
|
56 |
if assets.exists():
|
57 |
shutil.rmtree(assets, ignore_errors=True)
|
@@ -67,8 +68,6 @@ def _clone_without_lfs():
|
|
67 |
"remote add origin https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley.git"
|
68 |
)
|
69 |
sh(f"git -C {REPO_DIR} config core.sparseCheckout true")
|
70 |
-
|
71 |
-
# Choose only essential paths
|
72 |
sparse_file = REPO_DIR / ".git" / "info" / "sparse-checkout"
|
73 |
sparse_file.parent.mkdir(parents=True, exist_ok=True)
|
74 |
sparse_file.write_text("\n".join([
|
@@ -79,8 +78,7 @@ def _clone_without_lfs():
|
|
79 |
"LICENSE",
|
80 |
"README.md",
|
81 |
]) + "\n")
|
82 |
-
|
83 |
-
# Branch might be main; change to master if needed
|
84 |
try:
|
85 |
sh(f"git -C {REPO_DIR} fetch --depth 1 origin main")
|
86 |
sh(f"git -C {REPO_DIR} checkout main")
|
@@ -89,9 +87,8 @@ def _clone_without_lfs():
|
|
89 |
sh(f"git -C {REPO_DIR} checkout master")
|
90 |
|
91 |
def prepare_once():
|
92 |
-
"""Clone code (
|
93 |
_clone_without_lfs()
|
94 |
-
|
95 |
WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
|
96 |
snapshot_download(
|
97 |
repo_id="tencent/HunyuanVideo-Foley",
|
@@ -100,7 +97,6 @@ def prepare_once():
|
|
100 |
repo_type="model",
|
101 |
)
|
102 |
os.environ["HIFI_FOLEY_MODEL_PATH"] = str(WEIGHTS_DIR)
|
103 |
-
|
104 |
CACHE_DIR.mkdir(exist_ok=True)
|
105 |
OUT_DIR.mkdir(exist_ok=True)
|
106 |
|
@@ -120,7 +116,6 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
|
|
120 |
temp_dir = Path(tempfile.mkdtemp(prefix="pre_"))
|
121 |
trimmed = temp_dir / "trim.mp4"
|
122 |
processed = temp_dir / "proc.mp4"
|
123 |
-
|
124 |
trim_args = ["-t", str(MAX_SECS)] if dur > MAX_SECS else []
|
125 |
|
126 |
# Normalize container & remove audio
|
@@ -133,7 +128,7 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
|
|
133 |
f"\"{trimmed}\""
|
134 |
]))
|
135 |
|
136 |
-
# Downscale to TARGET_H; ensure mod2 width, baseline profile
|
137 |
vf = f"scale=-2:{TARGET_H}:flags=bicubic"
|
138 |
sh(" ".join([
|
139 |
"ffmpeg", "-y", "-i", f"\"{trimmed}\"",
|
@@ -150,7 +145,7 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
|
|
150 |
return str(processed), final_dur
|
151 |
|
152 |
# ========= Inference (ZeroGPU) =========
|
153 |
-
@spaces.GPU(duration=240) # ~4 minutes per call (
|
154 |
def run_model(video_path: str, prompt_text: str) -> str:
|
155 |
"""
|
156 |
Call Tencent's infer.py on GPU and return a 48 kHz WAV path.
|
@@ -207,16 +202,16 @@ def single_generate(video: str, prompt: str, want_mux: bool, project_name: str)
|
|
207 |
try:
|
208 |
if not video:
|
209 |
return None, None, "⚠️ Please upload a video.", history
|
210 |
-
history.append(["Preprocess", "Downscaling
|
211 |
pre_path, final_dur = preprocess_video(video)
|
212 |
-
history.append(["Inference", "
|
213 |
wav = run_model(pre_path, prompt or "")
|
214 |
muxed = None
|
215 |
if want_mux:
|
216 |
-
history.append(["Mux", "
|
217 |
muxed = mux_audio_with_video(pre_path, wav)
|
218 |
-
history.append(["Done", f"OK ·
|
219 |
-
return wav, muxed, f"✅
|
220 |
except Exception as e:
|
221 |
history.append(["Error", str(e)])
|
222 |
return None, None, f"❌ {type(e).__name__}: {e}", history
|
@@ -234,53 +229,107 @@ def batch_lite_generate(files: List[str], prompt: str, want_mux: bool) -> Tuple[
|
|
234 |
try:
|
235 |
log.append([f"Preprocess {i}", Path(f).name])
|
236 |
pre, final_dur = preprocess_video(f)
|
237 |
-
log.append([f"Run {i}", f"
|
238 |
wav = run_model(pre, prompt or "")
|
239 |
muxed = mux_audio_with_video(pre, wav) if want_mux else None
|
240 |
outputs.append((wav, muxed))
|
241 |
log.append([f"Done {i}", "OK"])
|
242 |
except Exception as e:
|
243 |
log.append([f"Error {i}", str(e)])
|
|
|
244 |
manifest = OUT_DIR / f"batchlite_{uuid.uuid4().hex[:6]}.json"
|
245 |
manifest.write_text(json.dumps(
|
246 |
[{"wav": w, "video": v} for (w, v) in outputs], ensure_ascii=False, indent=2
|
247 |
))
|
248 |
return f"✅ Batch-lite finished · items: {len(outputs)}", log
|
249 |
|
250 |
-
# ========= UI =========
|
251 |
THEME_CSS = f"""
|
252 |
:root {{
|
253 |
--brand: {PRIMARY_COLOR};
|
|
|
|
|
|
|
|
|
|
|
254 |
}}
|
255 |
.gradio-container {{
|
256 |
-
font-family: ui-sans-serif,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
}}
|
258 |
-
#
|
259 |
-
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
}}
|
262 |
-
#brandbar strong {{ letter-spacing: .3px; }}
|
263 |
"""
|
264 |
|
265 |
-
with gr.Blocks(css=THEME_CSS, title=
|
266 |
with gr.Row():
|
267 |
-
gr.HTML(f
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
with gr.Tabs():
|
270 |
with gr.Tab("🎬 Single Clip"):
|
271 |
with gr.Group():
|
272 |
-
project_name = gr.Textbox(
|
|
|
|
|
|
|
273 |
with gr.Row():
|
274 |
v_single = gr.Video(label=f"Video (≤ ~{MAX_SECS}s recommended)")
|
275 |
-
p_single = gr.Textbox(
|
|
|
|
|
|
|
276 |
with gr.Row():
|
277 |
-
want_mux_single = gr.Checkbox(value=True, label="Mux foley
|
278 |
run_btn = gr.Button("Generate", variant="primary")
|
279 |
with gr.Row():
|
280 |
out_audio = gr.Audio(label=f"Generated Foley ({SR//1000} kHz WAV)", type="filepath")
|
281 |
out_mux = gr.Video(label="Video + Foley (MP4)", visible=True)
|
282 |
status_md = gr.Markdown()
|
283 |
-
history_table = gr.Dataframe(
|
|
|
|
|
|
|
284 |
|
285 |
run_btn.click(
|
286 |
single_generate,
|
@@ -294,7 +343,10 @@ with gr.Blocks(css=THEME_CSS, title="Foley Studio · ZeroGPU") as demo:
|
|
294 |
want_mux_b = gr.Checkbox(value=True, label="Mux each output")
|
295 |
go_b = gr.Button("Run batch-lite")
|
296 |
batch_status = gr.Markdown()
|
297 |
-
batch_log = gr.Dataframe(
|
|
|
|
|
|
|
298 |
|
299 |
go_b.click(
|
300 |
batch_lite_generate,
|
@@ -302,17 +354,16 @@ with gr.Blocks(css=THEME_CSS, title="Foley Studio · ZeroGPU") as demo:
|
|
302 |
outputs=[batch_status, batch_log]
|
303 |
)
|
304 |
|
305 |
-
with gr.Tab("
|
306 |
gr.Markdown(f"""
|
307 |
-
**
|
308 |
-
-
|
309 |
-
-
|
310 |
-
- If you
|
311 |
-
|
312 |
-
**
|
313 |
-
-
|
314 |
-
-
|
315 |
-
- `MAX_SECS`, `TARGET_H`, `TARGET_SR` → processing behavior
|
316 |
""")
|
317 |
|
318 |
demo.queue(max_size=24).launch()
|
|
|
1 |
+
import os, json, tempfile, subprocess, shutil, uuid
|
2 |
from pathlib import Path
|
3 |
from typing import Optional, Tuple, List
|
4 |
|
|
|
15 |
ASSETS = ROOT / "assets"
|
16 |
ASSETS.mkdir(exist_ok=True)
|
17 |
|
18 |
+
# You can keep these env vars silently; we just won't mention them in the UI
|
19 |
+
APP_TITLE = os.environ.get("APP_TITLE", "Foley Studio · ZeroGPU")
|
20 |
+
APP_TAGLINE = os.environ.get("APP_TAGLINE", "Generate scene-true foley for short clips (ZeroGPU-ready).")
|
21 |
+
PRIMARY_COLOR = os.environ.get("PRIMARY_COLOR", "#6B5BFF")
|
22 |
|
23 |
+
MAX_SECS = int(os.environ.get("MAX_SECS", "22")) # ZeroGPU-friendly clip length
|
24 |
TARGET_H = int(os.environ.get("TARGET_H", "480")) # downscale target height
|
25 |
+
SR = int(os.environ.get("TARGET_SR", "48000")) # WAV sample rate
|
26 |
|
27 |
def sh(cmd: str):
|
28 |
print(">>", cmd)
|
|
|
40 |
|
41 |
def _clone_without_lfs():
|
42 |
"""
|
43 |
+
Clone repo while skipping LFS smudge to avoid demo video downloads.
|
44 |
+
Falls back to sparse checkout with only essential paths.
|
45 |
"""
|
46 |
if REPO_DIR.exists():
|
47 |
return
|
48 |
|
49 |
+
# Attempt 1: shallow clone with LFS disabled
|
50 |
try:
|
51 |
sh(
|
52 |
"GIT_LFS_SKIP_SMUDGE=1 "
|
53 |
"git -c filter.lfs.smudge= -c filter.lfs.required=false "
|
54 |
f"clone --depth 1 https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley.git {REPO_DIR}"
|
55 |
)
|
|
|
56 |
assets = REPO_DIR / "assets"
|
57 |
if assets.exists():
|
58 |
shutil.rmtree(assets, ignore_errors=True)
|
|
|
68 |
"remote add origin https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley.git"
|
69 |
)
|
70 |
sh(f"git -C {REPO_DIR} config core.sparseCheckout true")
|
|
|
|
|
71 |
sparse_file = REPO_DIR / ".git" / "info" / "sparse-checkout"
|
72 |
sparse_file.parent.mkdir(parents=True, exist_ok=True)
|
73 |
sparse_file.write_text("\n".join([
|
|
|
78 |
"LICENSE",
|
79 |
"README.md",
|
80 |
]) + "\n")
|
81 |
+
# Try main, fallback to master
|
|
|
82 |
try:
|
83 |
sh(f"git -C {REPO_DIR} fetch --depth 1 origin main")
|
84 |
sh(f"git -C {REPO_DIR} checkout main")
|
|
|
87 |
sh(f"git -C {REPO_DIR} checkout master")
|
88 |
|
89 |
def prepare_once():
|
90 |
+
"""Clone code (skip LFS), download weights, set env, prepare dirs."""
|
91 |
_clone_without_lfs()
|
|
|
92 |
WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
|
93 |
snapshot_download(
|
94 |
repo_id="tencent/HunyuanVideo-Foley",
|
|
|
97 |
repo_type="model",
|
98 |
)
|
99 |
os.environ["HIFI_FOLEY_MODEL_PATH"] = str(WEIGHTS_DIR)
|
|
|
100 |
CACHE_DIR.mkdir(exist_ok=True)
|
101 |
OUT_DIR.mkdir(exist_ok=True)
|
102 |
|
|
|
116 |
temp_dir = Path(tempfile.mkdtemp(prefix="pre_"))
|
117 |
trimmed = temp_dir / "trim.mp4"
|
118 |
processed = temp_dir / "proc.mp4"
|
|
|
119 |
trim_args = ["-t", str(MAX_SECS)] if dur > MAX_SECS else []
|
120 |
|
121 |
# Normalize container & remove audio
|
|
|
128 |
f"\"{trimmed}\""
|
129 |
]))
|
130 |
|
131 |
+
# Downscale to TARGET_H; ensure mod2 width, baseline profile
|
132 |
vf = f"scale=-2:{TARGET_H}:flags=bicubic"
|
133 |
sh(" ".join([
|
134 |
"ffmpeg", "-y", "-i", f"\"{trimmed}\"",
|
|
|
145 |
return str(processed), final_dur
|
146 |
|
147 |
# ========= Inference (ZeroGPU) =========
|
148 |
+
@spaces.GPU(duration=240) # ~4 minutes per call (ZeroGPU window)
|
149 |
def run_model(video_path: str, prompt_text: str) -> str:
|
150 |
"""
|
151 |
Call Tencent's infer.py on GPU and return a 48 kHz WAV path.
|
|
|
202 |
try:
|
203 |
if not video:
|
204 |
return None, None, "⚠️ Please upload a video.", history
|
205 |
+
history.append(["Preprocess", "Downscaling & trimming"])
|
206 |
pre_path, final_dur = preprocess_video(video)
|
207 |
+
history.append(["Inference", "Running on ZeroGPU"])
|
208 |
wav = run_model(pre_path, prompt or "")
|
209 |
muxed = None
|
210 |
if want_mux:
|
211 |
+
history.append(["Mux", "Merging foley with video"])
|
212 |
muxed = mux_audio_with_video(pre_path, wav)
|
213 |
+
history.append(["Done", f"OK · ~{final_dur:.1f}s"])
|
214 |
+
return wav, muxed, f"✅ Completed (~{final_dur:.1f}s)", history
|
215 |
except Exception as e:
|
216 |
history.append(["Error", str(e)])
|
217 |
return None, None, f"❌ {type(e).__name__}: {e}", history
|
|
|
229 |
try:
|
230 |
log.append([f"Preprocess {i}", Path(f).name])
|
231 |
pre, final_dur = preprocess_video(f)
|
232 |
+
log.append([f"Run {i}", f"ZeroGPU ~{final_dur:.1f}s"])
|
233 |
wav = run_model(pre, prompt or "")
|
234 |
muxed = mux_audio_with_video(pre, wav) if want_mux else None
|
235 |
outputs.append((wav, muxed))
|
236 |
log.append([f"Done {i}", "OK"])
|
237 |
except Exception as e:
|
238 |
log.append([f"Error {i}", str(e)])
|
239 |
+
|
240 |
manifest = OUT_DIR / f"batchlite_{uuid.uuid4().hex[:6]}.json"
|
241 |
manifest.write_text(json.dumps(
|
242 |
[{"wav": w, "video": v} for (w, v) in outputs], ensure_ascii=False, indent=2
|
243 |
))
|
244 |
return f"✅ Batch-lite finished · items: {len(outputs)}", log
|
245 |
|
246 |
+
# ========= UI (refreshed design) =========
|
247 |
THEME_CSS = f"""
|
248 |
:root {{
|
249 |
--brand: {PRIMARY_COLOR};
|
250 |
+
--bg: #0f1120;
|
251 |
+
--panel: #181a2e;
|
252 |
+
--text: #edf0ff;
|
253 |
+
--muted: #b7bce3;
|
254 |
+
--card: #15172a;
|
255 |
}}
|
256 |
.gradio-container {{
|
257 |
+
font-family: Inter, ui-sans-serif, -apple-system, Segoe UI, Roboto, Cairo, Noto Sans, Arial;
|
258 |
+
background: var(--bg);
|
259 |
+
color: var(--text);
|
260 |
+
}}
|
261 |
+
#hero {{
|
262 |
+
background: linear-gradient(135deg, var(--brand) 0%, #2f2e8b 40%, #1b1a3a 100%);
|
263 |
+
border-radius: 18px;
|
264 |
+
padding: 18px 20px;
|
265 |
+
color: white;
|
266 |
+
box-shadow: 0 10px 30px rgba(0,0,0,.35);
|
267 |
}}
|
268 |
+
#hero h1 {{
|
269 |
+
margin: 0 0 6px 0;
|
270 |
+
font-size: 20px;
|
271 |
+
font-weight: 700;
|
272 |
+
letter-spacing: .2px;
|
273 |
+
}}
|
274 |
+
#hero p {{
|
275 |
+
margin: 0;
|
276 |
+
opacity: .95;
|
277 |
+
}}
|
278 |
+
.gr-tabitem, .gr-block.gr-group, .gr-panel {{
|
279 |
+
background: var(--panel);
|
280 |
+
border-radius: 16px !important;
|
281 |
+
box-shadow: 0 6px 18px rgba(0,0,0,.28);
|
282 |
+
border: 1px solid rgba(255,255,255,.04);
|
283 |
+
}}
|
284 |
+
.gr-button {{
|
285 |
+
border-radius: 12px !important;
|
286 |
+
border: 1px solid rgba(255,255,255,.08) !important;
|
287 |
+
}}
|
288 |
+
.gradio-container .tabs .tab-nav button.selected {{
|
289 |
+
background: rgba(255,255,255,.06);
|
290 |
+
border-radius: 12px;
|
291 |
+
border: 1px solid rgba(255,255,255,.08);
|
292 |
+
}}
|
293 |
+
.badge {{
|
294 |
+
display:inline-block; padding:2px 8px; border-radius:999px;
|
295 |
+
background: rgba(255,255,255,.12); color:#fff; font-size:12px
|
296 |
}}
|
|
|
297 |
"""
|
298 |
|
299 |
+
with gr.Blocks(css=THEME_CSS, title=APP_TITLE, analytics_enabled=False) as demo:
|
300 |
with gr.Row():
|
301 |
+
gr.HTML(f"""
|
302 |
+
<div id="hero">
|
303 |
+
<h1>{APP_TITLE}</h1>
|
304 |
+
<p>{APP_TAGLINE}</p>
|
305 |
+
<div style="margin-top:8px"><span class="badge">ZeroGPU</span> <span class="badge">Auto-trim ≤ {MAX_SECS}s</span> <span class="badge">Downscale {TARGET_H}p</span></div>
|
306 |
+
</div>
|
307 |
+
""")
|
308 |
|
309 |
with gr.Tabs():
|
310 |
with gr.Tab("🎬 Single Clip"):
|
311 |
with gr.Group():
|
312 |
+
project_name = gr.Textbox(
|
313 |
+
label="Project name (optional)",
|
314 |
+
placeholder="Enter a short label for this clip"
|
315 |
+
)
|
316 |
with gr.Row():
|
317 |
v_single = gr.Video(label=f"Video (≤ ~{MAX_SECS}s recommended)")
|
318 |
+
p_single = gr.Textbox(
|
319 |
+
label="Sound prompt (optional)",
|
320 |
+
placeholder="e.g., soft footsteps on wood, light rain, indoor reverb"
|
321 |
+
)
|
322 |
with gr.Row():
|
323 |
+
want_mux_single = gr.Checkbox(value=True, label="Mux foley into MP4 output")
|
324 |
run_btn = gr.Button("Generate", variant="primary")
|
325 |
with gr.Row():
|
326 |
out_audio = gr.Audio(label=f"Generated Foley ({SR//1000} kHz WAV)", type="filepath")
|
327 |
out_mux = gr.Video(label="Video + Foley (MP4)", visible=True)
|
328 |
status_md = gr.Markdown()
|
329 |
+
history_table = gr.Dataframe(
|
330 |
+
headers=["Step", "Note"], datatype=["str","str"],
|
331 |
+
interactive=False, wrap=True, label="Activity", height=200
|
332 |
+
)
|
333 |
|
334 |
run_btn.click(
|
335 |
single_generate,
|
|
|
343 |
want_mux_b = gr.Checkbox(value=True, label="Mux each output")
|
344 |
go_b = gr.Button("Run batch-lite")
|
345 |
batch_status = gr.Markdown()
|
346 |
+
batch_log = gr.Dataframe(
|
347 |
+
headers=["Step","Note"], datatype=["str","str"],
|
348 |
+
interactive=False, wrap=True, label="Batch Log", height=240
|
349 |
+
)
|
350 |
|
351 |
go_b.click(
|
352 |
batch_lite_generate,
|
|
|
354 |
outputs=[batch_status, batch_log]
|
355 |
)
|
356 |
|
357 |
+
with gr.Tab("ℹ️ Tips"):
|
358 |
gr.Markdown(f"""
|
359 |
+
**Usage guidelines**
|
360 |
+
- Keep clips short (the tool trims to **≤ {MAX_SECS}s** automatically).
|
361 |
+
- The video is downscaled to **{TARGET_H}p** to fit the ZeroGPU time window.
|
362 |
+
- If you see a quota message, just try again later (ZeroGPU limits GPU minutes per visitor).
|
363 |
+
|
364 |
+
**Outputs**
|
365 |
+
- WAV is **{SR//1000} kHz** stereo.
|
366 |
+
- Enable **Mux** to get a ready MP4 with the generated foley track.
|
|
|
367 |
""")
|
368 |
|
369 |
demo.queue(max_size=24).launch()
|