Spaces:
Running
Running
File size: 6,531 Bytes
314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 250f4e9 314aa29 cba070b 250f4e9 314aa29 cba070b 250f4e9 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 250f4e9 314aa29 cba070b 314aa29 cba070b 250f4e9 cba070b 250f4e9 cba070b 250f4e9 cba070b 250f4e9 cba070b 314aa29 250f4e9 314aa29 cba070b 250f4e9 314aa29 250f4e9 cba070b 314aa29 cba070b 314aa29 cba070b 314aa29 cba070b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
import gradio as gr
import torch
import torchaudio
import numpy as np
from pathlib import Path
import tempfile
import os
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
def separate_stems(audio_file, model_name="htdemucs"):
"""
Separate audio stems using Demucs
"""
if audio_file is None:
return None, None, None, None, "β Please upload an audio file"
try:
# Import demucs modules
from demucs.pretrained import get_model
from demucs.apply import apply_model
from demucs.audio import save_audio
# Load the model
model = get_model(model_name)
model.to(device)
model.eval()
# Load audio
wav, sr = torchaudio.load(audio_file)
# Ensure stereo
if wav.shape[0] == 1:
wav = wav.repeat(2, 1)
elif wav.shape[0] > 2:
wav = wav[:2]
# Resample if necessary
if sr != model.samplerate:
resampler = torchaudio.transforms.Resample(sr, model.samplerate)
wav = resampler(wav)
sr = model.samplerate
# Move to device
wav = wav.to(device)
# Apply the model
with torch.no_grad():
sources = apply_model(model, wav.unsqueeze(0))
# Get source names
source_names = model.sources
# Save separated sources
output_files = {}
with tempfile.TemporaryDirectory() as temp_dir:
for i, source in enumerate(source_names):
output_path = os.path.join(temp_dir, f"{source}.wav")
save_audio(sources[0, i], output_path, sr)
# Read the saved file for Gradio
output_files[source] = output_path
# Return the separated stems (assuming 4 stems: drums, bass, other, vocals)
stems = [None] * 4
status_msg = f"β
Successfully separated into {len(source_names)} stems"
for i, source in enumerate(source_names[:4]): # Limit to 4 for UI
if source in output_files:
stems[i] = output_files[source]
return tuple(stems + [status_msg])
except Exception as e:
error_msg = f"β Error during separation: {str(e)}"
return None, None, None, None, error_msg
def create_hf_interface():
"""Create Hugging Face Spaces compatible interface"""
with gr.Blocks(
title="π΅ Music Stem Separator",
theme=gr.themes.Soft(),
) as interface:
gr.Markdown("""
# π΅ Music Stem Separator
Separate music into individual stems using **Meta's Demucs** model.
Upload an audio file and get separated tracks for **drums**, **bass**, **other instruments**, and **vocals**.
β‘ **Powered by Demucs** - State-of-the-art source separation
""")
with gr.Row():
with gr.Column():
# Input
audio_input = gr.Audio(
type="filepath",
label="πΌ Upload Music File"
)
model_choice = gr.Dropdown(
choices=[
("HTDemucs (4 stems)", "htdemucs"),
("HTDemucs FT (4 stems)", "htdemucs_ft"),
("MDX Extra (4 stems)", "mdx_extra")
],
value="htdemucs",
label="π€ Model"
)
separate_btn = gr.Button(
"ποΈ Separate Stems",
variant="primary"
)
with gr.Column():
gr.Markdown("""
### βΉοΈ About Stem Separation
**What you'll get:**
- π₯ **Drums**: Percussion and rhythm
- πΈ **Bass**: Bass lines and low frequencies
- πΉ **Other**: Instruments, synths, effects
- π€ **Vocals**: Lead and backing vocals
**Tips:**
- Higher quality input = better separation
- Processing takes 1-3 minutes depending on length
- Works best with modern pop/rock music
""")
# Status
status_output = gr.Textbox(
label="Status",
interactive=False
)
# Output stems
gr.Markdown("### πΆ Separated Stems")
with gr.Row():
drums_output = gr.Audio(
label="π₯ Drums",
interactive=False
)
bass_output = gr.Audio(
label="πΈ Bass",
interactive=False
)
with gr.Row():
other_output = gr.Audio(
label="πΉ Other",
interactive=False
)
vocals_output = gr.Audio(
label="π€ Vocals",
interactive=False
)
# Connect the interface
separate_btn.click(
fn=separate_stems,
inputs=[audio_input, model_choice],
outputs=[
drums_output,
bass_output,
other_output,
vocals_output,
status_output
]
)
# Examples - simplified for older Gradio
gr.Markdown("""
### π΅ Instructions
1. Upload an audio file (MP3, WAV, FLAC)
2. Choose a separation model
3. Click "Separate Stems"
4. Download individual tracks when ready
**Supported formats**: MP3, WAV, FLAC (max 50MB)
**Processing time**: 1-5 minutes depending on file length
""")
gr.Markdown("""
---
**Note**: This space uses Meta's Demucs for stem separation. Processing time depends on audio length and available compute resources.
**Limitations**:
- Max file size: ~50MB
- Processing time: 1-5 minutes
- Works best with clear, well-produced music
""")
return interface
# Create and launch the interface
if __name__ == "__main__":
demo = create_hf_interface()
demo.launch() |