ziqiangao commited on
Commit
1da61b3
·
1 Parent(s): a37de4d

corrrect process

Browse files
Files changed (1) hide show
  1. app.py +13 -15
app.py CHANGED
@@ -262,7 +262,6 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
262
  if data.ndim != 2:
263
  raise gr.Error("Expected stereo input")
264
  L, R = data[:, 0], data[:, 1]
265
- stereo = np.column_stack([L, R])
266
 
267
  # Step 1: LFE from lowpass
268
  p((1, 8), "Processing LFE")
@@ -285,19 +284,9 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
285
  crowd, _ = download_wav(crowd_resp['files'][0]['url'], target_fs=fs)
286
  other_after_crowd, _ = download_wav(crowd_resp['files'][1]['url'], target_fs=fs)
287
 
288
- # Step 3: Reverb using SoX (like regular mode)
289
- p((3, 8), "Applying Reverb")
290
 
291
- # Use the same reverb_args as 'open' preset from create_5_1_surround
292
- reverb_args = ['70', '40', '100', '95', '10', '0'] # music preset
293
-
294
- # Apply reverb to left and right channels separately
295
- reverb_L = apply_reverb_wet_only(other_after_crowd[:, 0], fs, reverb_args)
296
- reverb_R = apply_reverb_wet_only(other_after_crowd[:, 1], fs, reverb_args)
297
- reverb = np.column_stack([reverb_L, reverb_R])
298
-
299
- # Step 4: Speech, music, SFX separation from 'other_after_crowd'
300
- p((4, 8), "Separating Speech, Music, and SFX")
301
  demucs_input_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
302
  sf.write(demucs_input_buf.name, other_after_crowd, fs, format='FLAC', subtype='PCM_16')
303
  demucs_input_buf.close()
@@ -312,6 +301,14 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
312
  sfx, _ = download_wav(demucs_resp['files'][2]['url'], target_fs=fs)
313
  music, _ = download_wav(demucs_resp['files'][1]['url'], target_fs=fs)
314
 
 
 
 
 
 
 
 
 
315
  # Step 5: Vocal Extraction from music
316
  p((5, 8), "Extracting Vocals")
317
  music_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
@@ -338,6 +335,9 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
338
  _, FL_vl, FR_vl, FC_vl = extract_phantom_center(vl_buf.name)
339
  os.unlink(vl_buf.name)
340
 
 
 
 
341
  # Step 7: Mapping and stacking
342
  p((7, 8), "Mapping Channels and Encoding")
343
  def match_len(x, length): return np.pad(x, (0, length - len(x)))
@@ -354,14 +354,12 @@ def smart_mode_process(input_file, api_key, multi_singer=False):
354
  SL = match_len(reverb[:, 0], length)
355
  SR = match_len(reverb[:, 1], length)
356
 
357
- # Optional: if multi_singer, don’t include backing vocals
358
  if not multi_singer:
359
  SL += match_len(vocals_back[:, 0], length)
360
  SR += match_len(vocals_back[:, 1], length)
361
  SL += match_len(crowd[:, 0], length)
362
  SR += match_len(crowd[:, 1], length)
363
 
364
- # Final multichannel stack
365
  multich = np.column_stack([out_L, out_R, out_C, out_LFE, SL, SR])
366
 
367
  out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
 
262
  if data.ndim != 2:
263
  raise gr.Error("Expected stereo input")
264
  L, R = data[:, 0], data[:, 1]
 
265
 
266
  # Step 1: LFE from lowpass
267
  p((1, 8), "Processing LFE")
 
284
  crowd, _ = download_wav(crowd_resp['files'][0]['url'], target_fs=fs)
285
  other_after_crowd, _ = download_wav(crowd_resp['files'][1]['url'], target_fs=fs)
286
 
 
 
287
 
288
+ # Step 3: Speech, music, SFX separation from 'other_after_crowd'
289
+ p((3, 8), "Separating Speech, Music, and SFX")
 
 
 
 
 
 
 
 
290
  demucs_input_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
291
  sf.write(demucs_input_buf.name, other_after_crowd, fs, format='FLAC', subtype='PCM_16')
292
  demucs_input_buf.close()
 
301
  sfx, _ = download_wav(demucs_resp['files'][2]['url'], target_fs=fs)
302
  music, _ = download_wav(demucs_resp['files'][1]['url'], target_fs=fs)
303
 
304
+ # Step 4: Apply Reverb to the 'music' stem
305
+ p((4, 8), "Applying Reverb")
306
+ reverb_args = ['70', '40', '100', '95', '10', '0'] # music preset
307
+ reverb_L = apply_reverb_wet_only(music[:, 0], fs, reverb_args)
308
+ reverb_R = apply_reverb_wet_only(music[:, 1], fs, reverb_args)
309
+ reverb = np.column_stack([reverb_L, reverb_R])
310
+
311
+
312
  # Step 5: Vocal Extraction from music
313
  p((5, 8), "Extracting Vocals")
314
  music_buf = tempfile.NamedTemporaryFile(suffix=".flac", delete=False)
 
335
  _, FL_vl, FR_vl, FC_vl = extract_phantom_center(vl_buf.name)
336
  os.unlink(vl_buf.name)
337
 
338
+ # Mix dialog into the centre channel
339
+ FC_vl += dialog[:, 0] if dialog.ndim == 2 else dialog
340
+
341
  # Step 7: Mapping and stacking
342
  p((7, 8), "Mapping Channels and Encoding")
343
  def match_len(x, length): return np.pad(x, (0, length - len(x)))
 
354
  SL = match_len(reverb[:, 0], length)
355
  SR = match_len(reverb[:, 1], length)
356
 
 
357
  if not multi_singer:
358
  SL += match_len(vocals_back[:, 0], length)
359
  SR += match_len(vocals_back[:, 1], length)
360
  SL += match_len(crowd[:, 0], length)
361
  SR += match_len(crowd[:, 1], length)
362
 
 
363
  multich = np.column_stack([out_L, out_R, out_C, out_LFE, SL, SR])
364
 
365
  out_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)