stem-remixer / app.py
ahk-d's picture
Update app.py
250f4e9 verified
import gradio as gr
import torch
import torchaudio
import numpy as np
from pathlib import Path
import tempfile
import os
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
def separate_stems(audio_file, model_name="htdemucs"):
"""
Separate audio stems using Demucs
"""
if audio_file is None:
return None, None, None, None, "❌ Please upload an audio file"
try:
# Import demucs modules
from demucs.pretrained import get_model
from demucs.apply import apply_model
from demucs.audio import save_audio
# Load the model
model = get_model(model_name)
model.to(device)
model.eval()
# Load audio
wav, sr = torchaudio.load(audio_file)
# Ensure stereo
if wav.shape[0] == 1:
wav = wav.repeat(2, 1)
elif wav.shape[0] > 2:
wav = wav[:2]
# Resample if necessary
if sr != model.samplerate:
resampler = torchaudio.transforms.Resample(sr, model.samplerate)
wav = resampler(wav)
sr = model.samplerate
# Move to device
wav = wav.to(device)
# Apply the model
with torch.no_grad():
sources = apply_model(model, wav.unsqueeze(0))
# Get source names
source_names = model.sources
# Save separated sources
output_files = {}
with tempfile.TemporaryDirectory() as temp_dir:
for i, source in enumerate(source_names):
output_path = os.path.join(temp_dir, f"{source}.wav")
save_audio(sources[0, i], output_path, sr)
# Read the saved file for Gradio
output_files[source] = output_path
# Return the separated stems (assuming 4 stems: drums, bass, other, vocals)
stems = [None] * 4
status_msg = f"βœ… Successfully separated into {len(source_names)} stems"
for i, source in enumerate(source_names[:4]): # Limit to 4 for UI
if source in output_files:
stems[i] = output_files[source]
return tuple(stems + [status_msg])
except Exception as e:
error_msg = f"❌ Error during separation: {str(e)}"
return None, None, None, None, error_msg
def create_hf_interface():
"""Create Hugging Face Spaces compatible interface"""
with gr.Blocks(
title="🎡 Music Stem Separator",
theme=gr.themes.Soft(),
) as interface:
gr.Markdown("""
# 🎡 Music Stem Separator
Separate music into individual stems using **Meta's Demucs** model.
Upload an audio file and get separated tracks for **drums**, **bass**, **other instruments**, and **vocals**.
⚑ **Powered by Demucs** - State-of-the-art source separation
""")
with gr.Row():
with gr.Column():
# Input
audio_input = gr.Audio(
type="filepath",
label="🎼 Upload Music File"
)
model_choice = gr.Dropdown(
choices=[
("HTDemucs (4 stems)", "htdemucs"),
("HTDemucs FT (4 stems)", "htdemucs_ft"),
("MDX Extra (4 stems)", "mdx_extra")
],
value="htdemucs",
label="πŸ€– Model"
)
separate_btn = gr.Button(
"πŸŽ›οΈ Separate Stems",
variant="primary"
)
with gr.Column():
gr.Markdown("""
### ℹ️ About Stem Separation
**What you'll get:**
- πŸ₯ **Drums**: Percussion and rhythm
- 🎸 **Bass**: Bass lines and low frequencies
- 🎹 **Other**: Instruments, synths, effects
- 🎀 **Vocals**: Lead and backing vocals
**Tips:**
- Higher quality input = better separation
- Processing takes 1-3 minutes depending on length
- Works best with modern pop/rock music
""")
# Status
status_output = gr.Textbox(
label="Status",
interactive=False
)
# Output stems
gr.Markdown("### 🎢 Separated Stems")
with gr.Row():
drums_output = gr.Audio(
label="πŸ₯ Drums",
interactive=False
)
bass_output = gr.Audio(
label="🎸 Bass",
interactive=False
)
with gr.Row():
other_output = gr.Audio(
label="🎹 Other",
interactive=False
)
vocals_output = gr.Audio(
label="🎀 Vocals",
interactive=False
)
# Connect the interface
separate_btn.click(
fn=separate_stems,
inputs=[audio_input, model_choice],
outputs=[
drums_output,
bass_output,
other_output,
vocals_output,
status_output
]
)
# Examples - simplified for older Gradio
gr.Markdown("""
### 🎡 Instructions
1. Upload an audio file (MP3, WAV, FLAC)
2. Choose a separation model
3. Click "Separate Stems"
4. Download individual tracks when ready
**Supported formats**: MP3, WAV, FLAC (max 50MB)
**Processing time**: 1-5 minutes depending on file length
""")
gr.Markdown("""
---
**Note**: This space uses Meta's Demucs for stem separation. Processing time depends on audio length and available compute resources.
**Limitations**:
- Max file size: ~50MB
- Processing time: 1-5 minutes
- Works best with clear, well-produced music
""")
return interface
# Create and launch the interface
if __name__ == "__main__":
demo = create_hf_interface()
demo.launch()