Bils commited on
Commit
deede6d
·
verified ·
1 Parent(s): 33f355d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -48
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import os, json, tempfile, subprocess, shutil, time, uuid
2
  from pathlib import Path
3
  from typing import Optional, Tuple, List
4
 
@@ -15,12 +15,14 @@ OUT_DIR = ROOT / "outputs"
15
  ASSETS = ROOT / "assets"
16
  ASSETS.mkdir(exist_ok=True)
17
 
18
- BILS_BRAND = os.environ.get("BILS_BRAND", "Bilsimaging · Foley Studio")
19
- PRIMARY_COLOR = os.environ.get("PRIMARY_COLOR", "#6B5BFF") # UI accent
 
 
20
 
21
- MAX_SECS = int(os.environ.get("MAX_SECS", "22")) # ZeroGPU-friendly clip length
22
  TARGET_H = int(os.environ.get("TARGET_H", "480")) # downscale target height
23
- SR = int(os.environ.get("TARGET_SR", "48000")) # output WAV sample rate
24
 
25
  def sh(cmd: str):
26
  print(">>", cmd)
@@ -38,20 +40,19 @@ def ffprobe_duration(path: str) -> float:
38
 
39
  def _clone_without_lfs():
40
  """
41
- Try a shallow clone while skipping LFS smudge so demo MP4s aren't pulled.
42
- Falls back to sparse checkout with needed paths only.
43
  """
44
  if REPO_DIR.exists():
45
  return
46
 
47
- # Attempt 1: shallow clone, disable LFS filters
48
  try:
49
  sh(
50
  "GIT_LFS_SKIP_SMUDGE=1 "
51
  "git -c filter.lfs.smudge= -c filter.lfs.required=false "
52
  f"clone --depth 1 https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley.git {REPO_DIR}"
53
  )
54
- # Optional: remove assets folder if present
55
  assets = REPO_DIR / "assets"
56
  if assets.exists():
57
  shutil.rmtree(assets, ignore_errors=True)
@@ -67,8 +68,6 @@ def _clone_without_lfs():
67
  "remote add origin https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley.git"
68
  )
69
  sh(f"git -C {REPO_DIR} config core.sparseCheckout true")
70
-
71
- # Choose only essential paths
72
  sparse_file = REPO_DIR / ".git" / "info" / "sparse-checkout"
73
  sparse_file.parent.mkdir(parents=True, exist_ok=True)
74
  sparse_file.write_text("\n".join([
@@ -79,8 +78,7 @@ def _clone_without_lfs():
79
  "LICENSE",
80
  "README.md",
81
  ]) + "\n")
82
-
83
- # Branch might be main; change to master if needed
84
  try:
85
  sh(f"git -C {REPO_DIR} fetch --depth 1 origin main")
86
  sh(f"git -C {REPO_DIR} checkout main")
@@ -89,9 +87,8 @@ def _clone_without_lfs():
89
  sh(f"git -C {REPO_DIR} checkout master")
90
 
91
  def prepare_once():
92
- """Clone code (skipping LFS), download weights, set env, prepare dirs."""
93
  _clone_without_lfs()
94
-
95
  WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
96
  snapshot_download(
97
  repo_id="tencent/HunyuanVideo-Foley",
@@ -100,7 +97,6 @@ def prepare_once():
100
  repo_type="model",
101
  )
102
  os.environ["HIFI_FOLEY_MODEL_PATH"] = str(WEIGHTS_DIR)
103
-
104
  CACHE_DIR.mkdir(exist_ok=True)
105
  OUT_DIR.mkdir(exist_ok=True)
106
 
@@ -120,7 +116,6 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
120
  temp_dir = Path(tempfile.mkdtemp(prefix="pre_"))
121
  trimmed = temp_dir / "trim.mp4"
122
  processed = temp_dir / "proc.mp4"
123
-
124
  trim_args = ["-t", str(MAX_SECS)] if dur > MAX_SECS else []
125
 
126
  # Normalize container & remove audio
@@ -133,7 +128,7 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
133
  f"\"{trimmed}\""
134
  ]))
135
 
136
- # Downscale to TARGET_H; ensure mod2 width, baseline profile for compatibility
137
  vf = f"scale=-2:{TARGET_H}:flags=bicubic"
138
  sh(" ".join([
139
  "ffmpeg", "-y", "-i", f"\"{trimmed}\"",
@@ -150,7 +145,7 @@ def preprocess_video(in_path: str) -> Tuple[str, float]:
150
  return str(processed), final_dur
151
 
152
  # ========= Inference (ZeroGPU) =========
153
- @spaces.GPU(duration=240) # ~4 minutes per call (fits ZeroGPU window)
154
  def run_model(video_path: str, prompt_text: str) -> str:
155
  """
156
  Call Tencent's infer.py on GPU and return a 48 kHz WAV path.
@@ -207,16 +202,16 @@ def single_generate(video: str, prompt: str, want_mux: bool, project_name: str)
207
  try:
208
  if not video:
209
  return None, None, "⚠️ Please upload a video.", history
210
- history.append(["Preprocess", "Downscaling / trimming"])
211
  pre_path, final_dur = preprocess_video(video)
212
- history.append(["Inference", "Generating foley on GPU…"])
213
  wav = run_model(pre_path, prompt or "")
214
  muxed = None
215
  if want_mux:
216
- history.append(["Mux", "Combining foley with video"])
217
  muxed = mux_audio_with_video(pre_path, wav)
218
- history.append(["Done", f"OK · Duration ~{final_dur:.1f}s"])
219
- return wav, muxed, f"✅ Finished ({final_dur:.1f}s)", history
220
  except Exception as e:
221
  history.append(["Error", str(e)])
222
  return None, None, f"❌ {type(e).__name__}: {e}", history
@@ -234,53 +229,107 @@ def batch_lite_generate(files: List[str], prompt: str, want_mux: bool) -> Tuple[
234
  try:
235
  log.append([f"Preprocess {i}", Path(f).name])
236
  pre, final_dur = preprocess_video(f)
237
- log.append([f"Run {i}", f"GPU infer ~{final_dur:.1f}s"])
238
  wav = run_model(pre, prompt or "")
239
  muxed = mux_audio_with_video(pre, wav) if want_mux else None
240
  outputs.append((wav, muxed))
241
  log.append([f"Done {i}", "OK"])
242
  except Exception as e:
243
  log.append([f"Error {i}", str(e)])
 
244
  manifest = OUT_DIR / f"batchlite_{uuid.uuid4().hex[:6]}.json"
245
  manifest.write_text(json.dumps(
246
  [{"wav": w, "video": v} for (w, v) in outputs], ensure_ascii=False, indent=2
247
  ))
248
  return f"✅ Batch-lite finished · items: {len(outputs)}", log
249
 
250
- # ========= UI =========
251
  THEME_CSS = f"""
252
  :root {{
253
  --brand: {PRIMARY_COLOR};
 
 
 
 
 
254
  }}
255
  .gradio-container {{
256
- font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Cairo, Noto Sans, Arial, "Apple Color Emoji", "Segoe UI Emoji";
 
 
 
 
 
 
 
 
 
257
  }}
258
- #brandbar {{
259
- background: linear-gradient(90deg, var(--brand), #222);
260
- color: white; padding: 12px 16px; border-radius: 12px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  }}
262
- #brandbar strong {{ letter-spacing: .3px; }}
263
  """
264
 
265
- with gr.Blocks(css=THEME_CSS, title="Foley Studio · ZeroGPU") as demo:
266
  with gr.Row():
267
- gr.HTML(f'<div id="brandbar"><strong>{BILS_BRAND}</strong> — HunyuanVideo-Foley on ZeroGPU</div>')
 
 
 
 
 
 
268
 
269
  with gr.Tabs():
270
  with gr.Tab("🎬 Single Clip"):
271
  with gr.Group():
272
- project_name = gr.Textbox(label="Project name (optional)", placeholder="e.g., JawharaFM Teaser 09-2025")
 
 
 
273
  with gr.Row():
274
  v_single = gr.Video(label=f"Video (≤ ~{MAX_SECS}s recommended)")
275
- p_single = gr.Textbox(label="Sound prompt (optional)", placeholder="e.g., soft footsteps, indoor reverb, light rain")
 
 
 
276
  with gr.Row():
277
- want_mux_single = gr.Checkbox(value=True, label="Mux foley back into video (MP4)")
278
  run_btn = gr.Button("Generate", variant="primary")
279
  with gr.Row():
280
  out_audio = gr.Audio(label=f"Generated Foley ({SR//1000} kHz WAV)", type="filepath")
281
  out_mux = gr.Video(label="Video + Foley (MP4)", visible=True)
282
  status_md = gr.Markdown()
283
- history_table = gr.Dataframe(headers=["Step", "Note"], datatype=["str","str"], interactive=False, wrap=True, label="Activity")
 
 
 
284
 
285
  run_btn.click(
286
  single_generate,
@@ -294,7 +343,10 @@ with gr.Blocks(css=THEME_CSS, title="Foley Studio · ZeroGPU") as demo:
294
  want_mux_b = gr.Checkbox(value=True, label="Mux each output")
295
  go_b = gr.Button("Run batch-lite")
296
  batch_status = gr.Markdown()
297
- batch_log = gr.Dataframe(headers=["Step","Note"], datatype=["str","str"], interactive=False, wrap=True, label="Batch Log")
 
 
 
298
 
299
  go_b.click(
300
  batch_lite_generate,
@@ -302,17 +354,16 @@ with gr.Blocks(css=THEME_CSS, title="Foley Studio · ZeroGPU") as demo:
302
  outputs=[batch_status, batch_log]
303
  )
304
 
305
- with gr.Tab("⚙️ Settings / Tips"):
306
  gr.Markdown(f"""
307
- **ZeroGPU Tips**
308
- - Clips are trimmed to **≤ {MAX_SECS}s** automatically.
309
- - Video downscaled to **{TARGET_H}p** to fit the GPU time window.
310
- - If you hit a quota error, try again later (ZeroGPU limits minutes per visitor).
311
-
312
- **Branding via ENV**
313
- - `BILS_BRAND` header text
314
- - `PRIMARY_COLOR` hex color
315
- - `MAX_SECS`, `TARGET_H`, `TARGET_SR` → processing behavior
316
  """)
317
 
318
  demo.queue(max_size=24).launch()
 
1
+ import os, json, tempfile, subprocess, shutil, uuid
2
  from pathlib import Path
3
  from typing import Optional, Tuple, List
4
 
 
15
  ASSETS = ROOT / "assets"
16
  ASSETS.mkdir(exist_ok=True)
17
 
18
+ # You can keep these env vars silently; we just won't mention them in the UI
19
+ APP_TITLE = os.environ.get("APP_TITLE", "Foley Studio · ZeroGPU")
20
+ APP_TAGLINE = os.environ.get("APP_TAGLINE", "Generate scene-true foley for short clips (ZeroGPU-ready).")
21
+ PRIMARY_COLOR = os.environ.get("PRIMARY_COLOR", "#6B5BFF")
22
 
23
+ MAX_SECS = int(os.environ.get("MAX_SECS", "22")) # ZeroGPU-friendly clip length
24
  TARGET_H = int(os.environ.get("TARGET_H", "480")) # downscale target height
25
+ SR = int(os.environ.get("TARGET_SR", "48000")) # WAV sample rate
26
 
27
  def sh(cmd: str):
28
  print(">>", cmd)
 
40
 
41
  def _clone_without_lfs():
42
  """
43
+ Clone repo while skipping LFS smudge to avoid demo video downloads.
44
+ Falls back to sparse checkout with only essential paths.
45
  """
46
  if REPO_DIR.exists():
47
  return
48
 
49
+ # Attempt 1: shallow clone with LFS disabled
50
  try:
51
  sh(
52
  "GIT_LFS_SKIP_SMUDGE=1 "
53
  "git -c filter.lfs.smudge= -c filter.lfs.required=false "
54
  f"clone --depth 1 https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley.git {REPO_DIR}"
55
  )
 
56
  assets = REPO_DIR / "assets"
57
  if assets.exists():
58
  shutil.rmtree(assets, ignore_errors=True)
 
68
  "remote add origin https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley.git"
69
  )
70
  sh(f"git -C {REPO_DIR} config core.sparseCheckout true")
 
 
71
  sparse_file = REPO_DIR / ".git" / "info" / "sparse-checkout"
72
  sparse_file.parent.mkdir(parents=True, exist_ok=True)
73
  sparse_file.write_text("\n".join([
 
78
  "LICENSE",
79
  "README.md",
80
  ]) + "\n")
81
+ # Try main, fallback to master
 
82
  try:
83
  sh(f"git -C {REPO_DIR} fetch --depth 1 origin main")
84
  sh(f"git -C {REPO_DIR} checkout main")
 
87
  sh(f"git -C {REPO_DIR} checkout master")
88
 
89
  def prepare_once():
90
+ """Clone code (skip LFS), download weights, set env, prepare dirs."""
91
  _clone_without_lfs()
 
92
  WEIGHTS_DIR.mkdir(parents=True, exist_ok=True)
93
  snapshot_download(
94
  repo_id="tencent/HunyuanVideo-Foley",
 
97
  repo_type="model",
98
  )
99
  os.environ["HIFI_FOLEY_MODEL_PATH"] = str(WEIGHTS_DIR)
 
100
  CACHE_DIR.mkdir(exist_ok=True)
101
  OUT_DIR.mkdir(exist_ok=True)
102
 
 
116
  temp_dir = Path(tempfile.mkdtemp(prefix="pre_"))
117
  trimmed = temp_dir / "trim.mp4"
118
  processed = temp_dir / "proc.mp4"
 
119
  trim_args = ["-t", str(MAX_SECS)] if dur > MAX_SECS else []
120
 
121
  # Normalize container & remove audio
 
128
  f"\"{trimmed}\""
129
  ]))
130
 
131
+ # Downscale to TARGET_H; ensure mod2 width, baseline profile
132
  vf = f"scale=-2:{TARGET_H}:flags=bicubic"
133
  sh(" ".join([
134
  "ffmpeg", "-y", "-i", f"\"{trimmed}\"",
 
145
  return str(processed), final_dur
146
 
147
  # ========= Inference (ZeroGPU) =========
148
+ @spaces.GPU(duration=240) # ~4 minutes per call (ZeroGPU window)
149
  def run_model(video_path: str, prompt_text: str) -> str:
150
  """
151
  Call Tencent's infer.py on GPU and return a 48 kHz WAV path.
 
202
  try:
203
  if not video:
204
  return None, None, "⚠️ Please upload a video.", history
205
+ history.append(["Preprocess", "Downscaling & trimming"])
206
  pre_path, final_dur = preprocess_video(video)
207
+ history.append(["Inference", "Running on ZeroGPU"])
208
  wav = run_model(pre_path, prompt or "")
209
  muxed = None
210
  if want_mux:
211
+ history.append(["Mux", "Merging foley with video"])
212
  muxed = mux_audio_with_video(pre_path, wav)
213
+ history.append(["Done", f"OK · ~{final_dur:.1f}s"])
214
+ return wav, muxed, f"✅ Completed (~{final_dur:.1f}s)", history
215
  except Exception as e:
216
  history.append(["Error", str(e)])
217
  return None, None, f"❌ {type(e).__name__}: {e}", history
 
229
  try:
230
  log.append([f"Preprocess {i}", Path(f).name])
231
  pre, final_dur = preprocess_video(f)
232
+ log.append([f"Run {i}", f"ZeroGPU ~{final_dur:.1f}s"])
233
  wav = run_model(pre, prompt or "")
234
  muxed = mux_audio_with_video(pre, wav) if want_mux else None
235
  outputs.append((wav, muxed))
236
  log.append([f"Done {i}", "OK"])
237
  except Exception as e:
238
  log.append([f"Error {i}", str(e)])
239
+
240
  manifest = OUT_DIR / f"batchlite_{uuid.uuid4().hex[:6]}.json"
241
  manifest.write_text(json.dumps(
242
  [{"wav": w, "video": v} for (w, v) in outputs], ensure_ascii=False, indent=2
243
  ))
244
  return f"✅ Batch-lite finished · items: {len(outputs)}", log
245
 
246
+ # ========= UI (refreshed design) =========
247
  THEME_CSS = f"""
248
  :root {{
249
  --brand: {PRIMARY_COLOR};
250
+ --bg: #0f1120;
251
+ --panel: #181a2e;
252
+ --text: #edf0ff;
253
+ --muted: #b7bce3;
254
+ --card: #15172a;
255
  }}
256
  .gradio-container {{
257
+ font-family: Inter, ui-sans-serif, -apple-system, Segoe UI, Roboto, Cairo, Noto Sans, Arial;
258
+ background: var(--bg);
259
+ color: var(--text);
260
+ }}
261
+ #hero {{
262
+ background: linear-gradient(135deg, var(--brand) 0%, #2f2e8b 40%, #1b1a3a 100%);
263
+ border-radius: 18px;
264
+ padding: 18px 20px;
265
+ color: white;
266
+ box-shadow: 0 10px 30px rgba(0,0,0,.35);
267
  }}
268
+ #hero h1 {{
269
+ margin: 0 0 6px 0;
270
+ font-size: 20px;
271
+ font-weight: 700;
272
+ letter-spacing: .2px;
273
+ }}
274
+ #hero p {{
275
+ margin: 0;
276
+ opacity: .95;
277
+ }}
278
+ .gr-tabitem, .gr-block.gr-group, .gr-panel {{
279
+ background: var(--panel);
280
+ border-radius: 16px !important;
281
+ box-shadow: 0 6px 18px rgba(0,0,0,.28);
282
+ border: 1px solid rgba(255,255,255,.04);
283
+ }}
284
+ .gr-button {{
285
+ border-radius: 12px !important;
286
+ border: 1px solid rgba(255,255,255,.08) !important;
287
+ }}
288
+ .gradio-container .tabs .tab-nav button.selected {{
289
+ background: rgba(255,255,255,.06);
290
+ border-radius: 12px;
291
+ border: 1px solid rgba(255,255,255,.08);
292
+ }}
293
+ .badge {{
294
+ display:inline-block; padding:2px 8px; border-radius:999px;
295
+ background: rgba(255,255,255,.12); color:#fff; font-size:12px
296
  }}
 
297
  """
298
 
299
+ with gr.Blocks(css=THEME_CSS, title=APP_TITLE, analytics_enabled=False) as demo:
300
  with gr.Row():
301
+ gr.HTML(f"""
302
+ <div id="hero">
303
+ <h1>{APP_TITLE}</h1>
304
+ <p>{APP_TAGLINE}</p>
305
+ <div style="margin-top:8px"><span class="badge">ZeroGPU</span> <span class="badge">Auto-trim ≤ {MAX_SECS}s</span> <span class="badge">Downscale {TARGET_H}p</span></div>
306
+ </div>
307
+ """)
308
 
309
  with gr.Tabs():
310
  with gr.Tab("🎬 Single Clip"):
311
  with gr.Group():
312
+ project_name = gr.Textbox(
313
+ label="Project name (optional)",
314
+ placeholder="Enter a short label for this clip"
315
+ )
316
  with gr.Row():
317
  v_single = gr.Video(label=f"Video (≤ ~{MAX_SECS}s recommended)")
318
+ p_single = gr.Textbox(
319
+ label="Sound prompt (optional)",
320
+ placeholder="e.g., soft footsteps on wood, light rain, indoor reverb"
321
+ )
322
  with gr.Row():
323
+ want_mux_single = gr.Checkbox(value=True, label="Mux foley into MP4 output")
324
  run_btn = gr.Button("Generate", variant="primary")
325
  with gr.Row():
326
  out_audio = gr.Audio(label=f"Generated Foley ({SR//1000} kHz WAV)", type="filepath")
327
  out_mux = gr.Video(label="Video + Foley (MP4)", visible=True)
328
  status_md = gr.Markdown()
329
+ history_table = gr.Dataframe(
330
+ headers=["Step", "Note"], datatype=["str","str"],
331
+ interactive=False, wrap=True, label="Activity", height=200
332
+ )
333
 
334
  run_btn.click(
335
  single_generate,
 
343
  want_mux_b = gr.Checkbox(value=True, label="Mux each output")
344
  go_b = gr.Button("Run batch-lite")
345
  batch_status = gr.Markdown()
346
+ batch_log = gr.Dataframe(
347
+ headers=["Step","Note"], datatype=["str","str"],
348
+ interactive=False, wrap=True, label="Batch Log", height=240
349
+ )
350
 
351
  go_b.click(
352
  batch_lite_generate,
 
354
  outputs=[batch_status, batch_log]
355
  )
356
 
357
+ with gr.Tab("ℹ️ Tips"):
358
  gr.Markdown(f"""
359
+ **Usage guidelines**
360
+ - Keep clips short (the tool trims to **≤ {MAX_SECS}s** automatically).
361
+ - The video is downscaled to **{TARGET_H}p** to fit the ZeroGPU time window.
362
+ - If you see a quota message, just try again later (ZeroGPU limits GPU minutes per visitor).
363
+
364
+ **Outputs**
365
+ - WAV is **{SR//1000} kHz** stereo.
366
+ - Enable **Mux** to get a ready MP4 with the generated foley track.
 
367
  """)
368
 
369
  demo.queue(max_size=24).launch()