Spaces:
Sleeping
Sleeping
Update gradio_app.py
Browse files- gradio_app.py +6 -15
gradio_app.py
CHANGED
|
@@ -102,6 +102,8 @@ def separate_dnr_video(video_path):
|
|
| 102 |
|
| 103 |
return dialog_video, effect_video, music_video
|
| 104 |
|
|
|
|
|
|
|
| 105 |
@spaces.GPU()
|
| 106 |
def separate_speakers_video(video_path):
|
| 107 |
audio_path, video = extract_audio_from_video(video_path)
|
|
@@ -123,29 +125,18 @@ def separate_speakers_video(video_path):
|
|
| 123 |
|
| 124 |
output_files = []
|
| 125 |
for i in range(ests_speech.shape[0]):
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
# Ensure shape is [samples, channels]
|
| 129 |
-
if audio_np.ndim == 1:
|
| 130 |
-
audio_np = audio_np[:, None]
|
| 131 |
|
| 132 |
-
|
| 133 |
-
separated_audio_path = os.path.join(output_dir, audio_filename)
|
| 134 |
-
|
| 135 |
-
# Explicitly set format/subtype
|
| 136 |
-
sf.write(separated_audio_path, audio_np, TARGET_SR, format='WAV', subtype='PCM_16')
|
| 137 |
-
|
| 138 |
-
# Attach to video
|
| 139 |
out_video_path = os.path.join(output_dir, f"speaker_{i+1}.mp4")
|
| 140 |
attach_audio_to_video(video, separated_audio_path, out_video_path)
|
| 141 |
output_files.append(out_video_path)
|
| 142 |
|
| 143 |
-
# Return only existing video files
|
| 144 |
return output_files + [None] * (MAX_SPEAKERS - len(output_files))
|
| 145 |
|
| 146 |
|
| 147 |
|
| 148 |
-
|
| 149 |
# --- Gradio UI ---
|
| 150 |
with gr.Blocks() as demo:
|
| 151 |
gr.Markdown("# TIGER: Time-frequency Interleaved Gain Extraction and Reconstruction for Efficient Speech Separation")
|
|
@@ -197,4 +188,4 @@ with gr.Blocks() as demo:
|
|
| 197 |
vsep_btn.click(separate_speakers_video, inputs=vsep_input, outputs=vsep_outputs)
|
| 198 |
|
| 199 |
if __name__ == "__main__":
|
| 200 |
-
demo.launch()
|
|
|
|
| 102 |
|
| 103 |
return dialog_video, effect_video, music_video
|
| 104 |
|
| 105 |
+
|
| 106 |
+
|
| 107 |
@spaces.GPU()
|
| 108 |
def separate_speakers_video(video_path):
|
| 109 |
audio_path, video = extract_audio_from_video(video_path)
|
|
|
|
| 125 |
|
| 126 |
output_files = []
|
| 127 |
for i in range(ests_speech.shape[0]):
|
| 128 |
+
separated_audio_path = os.path.join(output_dir, f"speaker_{i+1}.wav")
|
| 129 |
+
torchaudio.save(separated_audio_path, ests_speech[i].unsqueeze(0).cpu(), TARGET_SR)
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
# Attach audio back to video
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
out_video_path = os.path.join(output_dir, f"speaker_{i+1}.mp4")
|
| 133 |
attach_audio_to_video(video, separated_audio_path, out_video_path)
|
| 134 |
output_files.append(out_video_path)
|
| 135 |
|
|
|
|
| 136 |
return output_files + [None] * (MAX_SPEAKERS - len(output_files))
|
| 137 |
|
| 138 |
|
| 139 |
|
|
|
|
| 140 |
# --- Gradio UI ---
|
| 141 |
with gr.Blocks() as demo:
|
| 142 |
gr.Markdown("# TIGER: Time-frequency Interleaved Gain Extraction and Reconstruction for Efficient Speech Separation")
|
|
|
|
| 188 |
vsep_btn.click(separate_speakers_video, inputs=vsep_input, outputs=vsep_outputs)
|
| 189 |
|
| 190 |
if __name__ == "__main__":
|
| 191 |
+
demo.launch(ssr_mode=False)
|