Spaces:

ziqiangao
/

surroundify

Running

App Files Files Community

ziqiangao commited on 9 days ago

Commit

0a346a3

1 Parent(s): 938bd0e

Add Multisinger since the lead/back vocal model just uses the center

Browse files

Files changed (1) hide show

app.py +12 -5

app.py CHANGED Viewed

@@ -243,7 +243,7 @@ def download_wav(url, target_fs=None):
     return audio, sr
 # Smart mode workflow
-def smart_mode_process(input_file, api_key):
     p = gr.Progress()
     import shutil
@@ -305,6 +305,7 @@ def smart_mode_process(input_file, api_key):
         api_key, open(other_buf.name, 'rb').read(), os.path.basename(other_buf.name), sep_type=49, output_format=2, addopt1=3, addopt2=1
     )
     os.unlink(other_buf.name)
     vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
     vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
     instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
@@ -312,7 +313,7 @@ def smart_mode_process(input_file, api_key):
     # Step 5: Phantom center for lead vocals
     p((5,7), "Distributing Front Vocal Channels")
     vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-    sf.write(vl_buf.name, vocals_lead, fs, subtype='FLOAT')
     vl_buf.close()
     _, FL_vl, FR_vl, FC_vl = extract_phantom_center(vl_buf.name)
     os.unlink(vl_buf.name)
@@ -327,8 +328,13 @@ def smart_mode_process(input_file, api_key):
     out_R = match_len(FR_vl, length) + match_len(instr[:,1], length)
     out_C = match_len(FC_vl, length)
     out_LFE = match_len(bass, length)
-    SL = match_len(vocals_back[:,0], length) + match_len(sfx[:,0], length) + match_len(crowd[:,0], length)
-    SR = match_len(vocals_back[:,1], length) + match_len(sfx[:,1], length) + match_len(crowd[:,1], length)
     # Step 7: Encode to 5.1 OGG
     p((7,7), "Processing Step 7, Encoding")
@@ -365,6 +371,7 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
     # Smart mode section
     with gr.Column(visible=False) as smart_section:
         api_key = gr.Textbox(label="MVSep API Key", type="password")
         smart_btn = gr.Button("Start")
         smart_out = gr.File(label="Output from Smart Mode")
@@ -385,7 +392,7 @@ with gr.Blocks(title="Stereo to 5.1 Surround") as demo:
     # Button functions
     btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out], concurrency_limit=10)
-    smart_btn.click(fn=smart_mode_process, inputs=[inp, api_key], outputs=[smart_out], concurrency_limit=20)
 if __name__ == "__main__":
     demo.launch(show_error=True)

     return audio, sr
 # Smart mode workflow
+def smart_mode_process(input_file, api_key, multi_singer=False):
     p = gr.Progress()
     import shutil
         api_key, open(other_buf.name, 'rb').read(), os.path.basename(other_buf.name), sep_type=49, output_format=2, addopt1=3, addopt2=1
     )
     os.unlink(other_buf.name)
+    vocals_full, _ = download_wav(karaoke_resp['files'][0]['url'], target_fs=fs)
     vocals_lead, _ = download_wav(karaoke_resp['files'][1]['url'], target_fs=fs)
     vocals_back, _ = download_wav(karaoke_resp['files'][2]['url'], target_fs=fs)
     instr, _ = download_wav(karaoke_resp['files'][3]['url'], target_fs=fs)
     # Step 5: Phantom center for lead vocals
     p((5,7), "Distributing Front Vocal Channels")
     vl_buf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    sf.write(vl_buf.name, vocals_full if multi_singer else vocals_lead, fs, subtype='FLOAT')
     vl_buf.close()
     _, FL_vl, FR_vl, FC_vl = extract_phantom_center(vl_buf.name)
     os.unlink(vl_buf.name)
     out_R = match_len(FR_vl, length) + match_len(instr[:,1], length)
     out_C = match_len(FC_vl, length)
     out_LFE = match_len(bass, length)
+    if multi_singer:
+        SL = match_len(sfx[:,0], length) + match_len(crowd[:,0], length)
+        SR = match_len(sfx[:,1], length) + match_len(crowd[:,1], length)
+    else:
+        SL = match_len(vocals_back[:,0], length) + match_len(sfx[:,0], length) + match_len(crowd[:,0], length)
+        SR = match_len(vocals_back[:,1], length) + match_len(sfx[:,1], length) + match_len(crowd[:,1], length)
     # Step 7: Encode to 5.1 OGG
     p((7,7), "Processing Step 7, Encoding")
     # Smart mode section
     with gr.Column(visible=False) as smart_section:
         api_key = gr.Textbox(label="MVSep API Key", type="password")
+        multi_singer = gr.Checkbox(label="Multi Singer Mode", value=False)
         smart_btn = gr.Button("Start")
         smart_out = gr.File(label="Output from Smart Mode")
     # Button functions
     btn.click(fn=create_5_1_surround, inputs=[inp, preset], outputs=[out], concurrency_limit=10)
+    smart_btn.click(fn=smart_mode_process, inputs=[inp, api_key, multi_singer], outputs=[smart_out], concurrency_limit=20)
 if __name__ == "__main__":
     demo.launch(show_error=True)