ziqiangao commited on
Commit
0a346a3
·
1 Parent(s): 938bd0e

Add Multisinger since the lead/back vocal model just uses the center

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -243,7 +243,7 @@ def download_wav(url, target_fs=None):
243
  return audio, sr
244
 
245
  # Smart mode workflow
246
- def smart_mode_process(input_file, api_key):
247
  p = gr.Progress()
248
  import shutil
249
 
@@ -305,6 +305,7 @@ def smart_mode_process(input_file, api_key):
305
  api_key, open(other_buf.name, 'rb').read(), os.path.basename(other_buf.name), sep_type=49, output_format=2, addopt1=3, addopt2=1
306
  )
307
  os.unlink(other_buf.name)
 
308
  vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
309
  vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
310
  instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
@@ -312,7 +313,7 @@ def smart_mode_process(input_file, api_key):
312
  # Step 5: Phantom center for lead vocals
313
  p((5,7), "Distributing Front Vocal Channels")
314
  vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
315
- sf.write(vl_buf.name, vocals_lead, fs, subtype='FLOAT')
316
  vl_buf.close()
317
  _, FL_vl, FR_vl, FC_vl = extract_phantom_center(vl_buf.name)
318
  os.unlink(vl_buf.name)
@@ -327,8 +328,13 @@ def smart_mode_process(input_file, api_key):
327
  out_R = match_len(FR_vl, length) + match_len(instr[:,1], length)
328
  out_C = match_len(FC_vl, length)
329
  out_LFE = match_len(bass, length)
330
- SL = match_len(vocals_back[:,0], length) + match_len(sfx[:,0], length) + match_len(crowd[:,0], length)
331
- SR = match_len(vocals_back[:,1], length) + match_len(sfx[:,1], length) + match_len(crowd[:,1], length)
 
 
 
 
 
332
 
333
  # Step 7: Encode to 5.1 OGG
334
  p((7,7), "Processing Step 7, Encoding")
@@ -365,6 +371,7 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
365
  # Smart mode section
366
  with gr.Column(visible=False) as smart_section:
367
  api_key = gr.Textbox(label="MVSep API Key", type="password")
 
368
  smart_btn = gr.Button("Start")
369
  smart_out = gr.File(label="Output from Smart Mode")
370
 
@@ -385,7 +392,7 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
385
 
386
  # Button functions
387
  btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out], concurrency_limit=10)
388
- smart_btn.click(fn=smart_mode_process, inputs=[inp, api_key], outputs=[smart_out], concurrency_limit=20)
389
 
390
  if __name__ == "__main__":
391
  demo.launch(show_error=True)
 
243
  return audio, sr
244
 
245
  # Smart mode workflow
246
+ def smart_mode_process(input_file, api_key, multi_singer=False):
247
  p = gr.Progress()
248
  import shutil
249
 
 
305
  api_key, open(other_buf.name, 'rb').read(), os.path.basename(other_buf.name), sep_type=49, output_format=2, addopt1=3, addopt2=1
306
  )
307
  os.unlink(other_buf.name)
308
+ vocals_full, _ = download_wav(karaoke_resp['files'][0]['url'], target_fs=fs)
309
  vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
310
  vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
311
  instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
 
313
  # Step 5: Phantom center for lead vocals
314
  p((5,7), "Distributing Front Vocal Channels")
315
  vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
316
+ sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
317
  vl_buf.close()
318
  _, FL_vl, FR_vl, FC_vl = extract_phantom_center(vl_buf.name)
319
  os.unlink(vl_buf.name)
 
328
  out_R = match_len(FR_vl, length) + match_len(instr[:,1], length)
329
  out_C = match_len(FC_vl, length)
330
  out_LFE = match_len(bass, length)
331
+ if multi_singer:
332
+ SL = match_len(sfx[:,0], length) + match_len(crowd[:,0], length)
333
+ SR = match_len(sfx[:,1], length) + match_len(crowd[:,1], length)
334
+ else:
335
+ SL = match_len(vocals_back[:,0], length) + match_len(sfx[:,0], length) + match_len(crowd[:,0], length)
336
+ SR = match_len(vocals_back[:,1], length) + match_len(sfx[:,1], length) + match_len(crowd[:,1], length)
337
+
338
 
339
  # Step 7: Encode to 5.1 OGG
340
  p((7,7), "Processing Step 7, Encoding")
 
371
  # Smart mode section
372
  with gr.Column(visible=False) as smart_section:
373
  api_key = gr.Textbox(label="MVSep API Key", type="password")
374
+ multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
375
  smart_btn = gr.Button("Start")
376
  smart_out = gr.File(label="Output from Smart Mode")
377
 
 
392
 
393
  # Button functions
394
  btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out], concurrency_limit=10)
395
+ smart_btn.click(fn=smart_mode_process, inputs=[inp, api_key, multi_singer], outputs=[smart_out], concurrency_limit=20)
396
 
397
  if __name__ == "__main__":
398
  demo.launch(show_error=True)