Spaces:
Running
Running
import gradio as gr | |
import torch | |
import torchaudio | |
import numpy as np | |
from pathlib import Path | |
import tempfile | |
import os | |
# Check if CUDA is available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
print(f"Using device: {device}") | |
def separate_stems(audio_file, model_name="htdemucs"): | |
""" | |
Separate audio stems using Demucs | |
""" | |
if audio_file is None: | |
return None, None, None, None, "β Please upload an audio file" | |
try: | |
# Import demucs modules | |
from demucs.pretrained import get_model | |
from demucs.apply import apply_model | |
from demucs.audio import save_audio | |
# Load the model | |
model = get_model(model_name) | |
model.to(device) | |
model.eval() | |
# Load audio | |
wav, sr = torchaudio.load(audio_file) | |
# Ensure stereo | |
if wav.shape[0] == 1: | |
wav = wav.repeat(2, 1) | |
elif wav.shape[0] > 2: | |
wav = wav[:2] | |
# Resample if necessary | |
if sr != model.samplerate: | |
resampler = torchaudio.transforms.Resample(sr, model.samplerate) | |
wav = resampler(wav) | |
sr = model.samplerate | |
# Move to device | |
wav = wav.to(device) | |
# Apply the model | |
with torch.no_grad(): | |
sources = apply_model(model, wav.unsqueeze(0)) | |
# Get source names | |
source_names = model.sources | |
# Save separated sources | |
output_files = {} | |
with tempfile.TemporaryDirectory() as temp_dir: | |
for i, source in enumerate(source_names): | |
output_path = os.path.join(temp_dir, f"{source}.wav") | |
save_audio(sources[0, i], output_path, sr) | |
# Read the saved file for Gradio | |
output_files[source] = output_path | |
# Return the separated stems (assuming 4 stems: drums, bass, other, vocals) | |
stems = [None] * 4 | |
status_msg = f"β Successfully separated into {len(source_names)} stems" | |
for i, source in enumerate(source_names[:4]): # Limit to 4 for UI | |
if source in output_files: | |
stems[i] = output_files[source] | |
return tuple(stems + [status_msg]) | |
except Exception as e: | |
error_msg = f"β Error during separation: {str(e)}" | |
return None, None, None, None, error_msg | |
def create_hf_interface(): | |
"""Create Hugging Face Spaces compatible interface""" | |
with gr.Blocks( | |
title="π΅ Music Stem Separator", | |
theme=gr.themes.Soft(), | |
) as interface: | |
gr.Markdown(""" | |
# π΅ Music Stem Separator | |
Separate music into individual stems using **Meta's Demucs** model. | |
Upload an audio file and get separated tracks for **drums**, **bass**, **other instruments**, and **vocals**. | |
β‘ **Powered by Demucs** - State-of-the-art source separation | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
# Input | |
audio_input = gr.Audio( | |
type="filepath", | |
label="πΌ Upload Music File" | |
) | |
model_choice = gr.Dropdown( | |
choices=[ | |
("HTDemucs (4 stems)", "htdemucs"), | |
("HTDemucs FT (4 stems)", "htdemucs_ft"), | |
("MDX Extra (4 stems)", "mdx_extra") | |
], | |
value="htdemucs", | |
label="π€ Model" | |
) | |
separate_btn = gr.Button( | |
"ποΈ Separate Stems", | |
variant="primary" | |
) | |
with gr.Column(): | |
gr.Markdown(""" | |
### βΉοΈ About Stem Separation | |
**What you'll get:** | |
- π₯ **Drums**: Percussion and rhythm | |
- πΈ **Bass**: Bass lines and low frequencies | |
- πΉ **Other**: Instruments, synths, effects | |
- π€ **Vocals**: Lead and backing vocals | |
**Tips:** | |
- Higher quality input = better separation | |
- Processing takes 1-3 minutes depending on length | |
- Works best with modern pop/rock music | |
""") | |
# Status | |
status_output = gr.Textbox( | |
label="Status", | |
interactive=False | |
) | |
# Output stems | |
gr.Markdown("### πΆ Separated Stems") | |
with gr.Row(): | |
drums_output = gr.Audio( | |
label="π₯ Drums", | |
interactive=False | |
) | |
bass_output = gr.Audio( | |
label="πΈ Bass", | |
interactive=False | |
) | |
with gr.Row(): | |
other_output = gr.Audio( | |
label="πΉ Other", | |
interactive=False | |
) | |
vocals_output = gr.Audio( | |
label="π€ Vocals", | |
interactive=False | |
) | |
# Connect the interface | |
separate_btn.click( | |
fn=separate_stems, | |
inputs=[audio_input, model_choice], | |
outputs=[ | |
drums_output, | |
bass_output, | |
other_output, | |
vocals_output, | |
status_output | |
] | |
) | |
# Examples - simplified for older Gradio | |
gr.Markdown(""" | |
### π΅ Instructions | |
1. Upload an audio file (MP3, WAV, FLAC) | |
2. Choose a separation model | |
3. Click "Separate Stems" | |
4. Download individual tracks when ready | |
**Supported formats**: MP3, WAV, FLAC (max 50MB) | |
**Processing time**: 1-5 minutes depending on file length | |
""") | |
gr.Markdown(""" | |
--- | |
**Note**: This space uses Meta's Demucs for stem separation. Processing time depends on audio length and available compute resources. | |
**Limitations**: | |
- Max file size: ~50MB | |
- Processing time: 1-5 minutes | |
- Works best with clear, well-produced music | |
""") | |
return interface | |
# Create and launch the interface | |
if __name__ == "__main__": | |
demo = create_hf_interface() | |
demo.launch() |