Spaces:
Running
Running
ziqiangao
commited on
Commit
·
0a346a3
1
Parent(s):
938bd0e
Add Multisinger since the lead/back vocal model just uses the center
Browse files
app.py
CHANGED
@@ -243,7 +243,7 @@ def download_wav(url, target_fs=None):
|
|
243 |
return audio, sr
|
244 |
|
245 |
# Smart mode workflow
|
246 |
-
def smart_mode_process(input_file, api_key):
|
247 |
p = gr.Progress()
|
248 |
import shutil
|
249 |
|
@@ -305,6 +305,7 @@ def smart_mode_process(input_file, api_key):
|
|
305 |
api_key, open(other_buf.name, 'rb').read(), os.path.basename(other_buf.name), sep_type=49, output_format=2, addopt1=3, addopt2=1
|
306 |
)
|
307 |
os.unlink(other_buf.name)
|
|
|
308 |
vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
|
309 |
vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
|
310 |
instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
|
@@ -312,7 +313,7 @@ def smart_mode_process(input_file, api_key):
|
|
312 |
# Step 5: Phantom center for lead vocals
|
313 |
p((5,7), "Distributing Front Vocal Channels")
|
314 |
vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
315 |
-
sf.write(vl_buf.name, vocals_lead, fs, subtype='FLOAT')
|
316 |
vl_buf.close()
|
317 |
_, FL_vl, FR_vl, FC_vl = extract_phantom_center(vl_buf.name)
|
318 |
os.unlink(vl_buf.name)
|
@@ -327,8 +328,13 @@ def smart_mode_process(input_file, api_key):
|
|
327 |
out_R = match_len(FR_vl, length) + match_len(instr[:,1], length)
|
328 |
out_C = match_len(FC_vl, length)
|
329 |
out_LFE = match_len(bass, length)
|
330 |
-
|
331 |
-
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
# Step 7: Encode to 5.1 OGG
|
334 |
p((7,7), "Processing Step 7, Encoding")
|
@@ -365,6 +371,7 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
|
|
365 |
# Smart mode section
|
366 |
with gr.Column(visible=False) as smart_section:
|
367 |
api_key = gr.Textbox(label="MVSep API Key", type="password")
|
|
|
368 |
smart_btn = gr.Button("Start")
|
369 |
smart_out = gr.File(label="Output from Smart Mode")
|
370 |
|
@@ -385,7 +392,7 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
|
|
385 |
|
386 |
# Button functions
|
387 |
btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out], concurrency_limit=10)
|
388 |
-
smart_btn.click(fn=smart_mode_process, inputs=[inp, api_key], outputs=[smart_out], concurrency_limit=20)
|
389 |
|
390 |
if __name__ == "__main__":
|
391 |
demo.launch(show_error=True)
|
|
|
243 |
return audio, sr
|
244 |
|
245 |
# Smart mode workflow
|
246 |
+
def smart_mode_process(input_file, api_key, multi_singer=False):
|
247 |
p = gr.Progress()
|
248 |
import shutil
|
249 |
|
|
|
305 |
api_key, open(other_buf.name, 'rb').read(), os.path.basename(other_buf.name), sep_type=49, output_format=2, addopt1=3, addopt2=1
|
306 |
)
|
307 |
os.unlink(other_buf.name)
|
308 |
+
vocals_full, _ = download_wav(karaoke_resp['files'][0]['url'], target_fs=fs)
|
309 |
vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
|
310 |
vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
|
311 |
instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
|
|
|
313 |
# Step 5: Phantom center for lead vocals
|
314 |
p((5,7), "Distributing Front Vocal Channels")
|
315 |
vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
316 |
+
sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
|
317 |
vl_buf.close()
|
318 |
_, FL_vl, FR_vl, FC_vl = extract_phantom_center(vl_buf.name)
|
319 |
os.unlink(vl_buf.name)
|
|
|
328 |
out_R = match_len(FR_vl, length) + match_len(instr[:,1], length)
|
329 |
out_C = match_len(FC_vl, length)
|
330 |
out_LFE = match_len(bass, length)
|
331 |
+
if multi_singer:
|
332 |
+
SL = match_len(sfx[:,0], length) + match_len(crowd[:,0], length)
|
333 |
+
SR = match_len(sfx[:,1], length) + match_len(crowd[:,1], length)
|
334 |
+
else:
|
335 |
+
SL = match_len(vocals_back[:,0], length) + match_len(sfx[:,0], length) + match_len(crowd[:,0], length)
|
336 |
+
SR = match_len(vocals_back[:,1], length) + match_len(sfx[:,1], length) + match_len(crowd[:,1], length)
|
337 |
+
|
338 |
|
339 |
# Step 7: Encode to 5.1 OGG
|
340 |
p((7,7), "Processing Step 7, Encoding")
|
|
|
371 |
# Smart mode section
|
372 |
with gr.Column(visible=False) as smart_section:
|
373 |
api_key = gr.Textbox(label="MVSep API Key", type="password")
|
374 |
+
multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
|
375 |
smart_btn = gr.Button("Start")
|
376 |
smart_out = gr.File(label="Output from Smart Mode")
|
377 |
|
|
|
392 |
|
393 |
# Button functions
|
394 |
btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out], concurrency_limit=10)
|
395 |
+
smart_btn.click(fn=smart_mode_process, inputs=[inp, api_key, multi_singer], outputs=[smart_out], concurrency_limit=20)
|
396 |
|
397 |
if __name__ == "__main__":
|
398 |
demo.launch(show_error=True)
|