Spaces:

ahk-d
/

stem-remixer

Running

File size: 6,531 Bytes

314aa29
cba070b
 
 
 
314aa29
 
 
cba070b
 
 
314aa29
cba070b
 
 
 
 
 
314aa29
cba070b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314aa29
cba070b
 
314aa29
cba070b
 
 
314aa29
cba070b
 
 
314aa29
cba070b
314aa29
cba070b
 
 
314aa29
cba070b
 
314aa29
 
cba070b
314aa29
cba070b
314aa29
 
cba070b
314aa29
cba070b
 
314aa29
cba070b
314aa29
 
 
cba070b
 
314aa29
 
250f4e9
314aa29
 
cba070b
 
 
 
 
 
 
250f4e9
314aa29
 
 
cba070b
250f4e9
314aa29
 
cba070b
314aa29
cba070b
314aa29
cba070b
 
 
 
 
 
 
314aa29
cba070b
 
314aa29
 
cba070b
 
314aa29
250f4e9
314aa29
 
cba070b
 
314aa29
cba070b
 
 
250f4e9
cba070b
 
 
250f4e9
cba070b
 
 
 
 
250f4e9
cba070b
 
 
250f4e9
cba070b
314aa29
250f4e9
314aa29
cba070b
 
 
 
 
 
 
 
250f4e9
314aa29
 
250f4e9
 
 
 
 
 
 
 
 
 
 
cba070b
 
 
 
 
 
 
 
 
 
314aa29
cba070b
314aa29
cba070b
314aa29
cba070b

import gradio as gr
import torch
import torchaudio
import numpy as np
from pathlib import Path
import tempfile
import os

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def separate_stems(audio_file, model_name="htdemucs"):
    """
    Separate audio stems using Demucs
    """
    if audio_file is None:
        return None, None, None, None, "❌ Please upload an audio file"
    
    try:
        # Import demucs modules
        from demucs.pretrained import get_model
        from demucs.apply import apply_model
        from demucs.audio import save_audio
        
        # Load the model
        model = get_model(model_name)
        model.to(device)
        model.eval()
        
        # Load audio
        wav, sr = torchaudio.load(audio_file)
        
        # Ensure stereo
        if wav.shape[0] == 1:
            wav = wav.repeat(2, 1)
        elif wav.shape[0] > 2:
            wav = wav[:2]
        
        # Resample if necessary
        if sr != model.samplerate:
            resampler = torchaudio.transforms.Resample(sr, model.samplerate)
            wav = resampler(wav)
            sr = model.samplerate
        
        # Move to device
        wav = wav.to(device)
        
        # Apply the model
        with torch.no_grad():
            sources = apply_model(model, wav.unsqueeze(0))
        
        # Get source names
        source_names = model.sources
        
        # Save separated sources
        output_files = {}
        with tempfile.TemporaryDirectory() as temp_dir:
            for i, source in enumerate(source_names):
                output_path = os.path.join(temp_dir, f"{source}.wav")
                save_audio(sources[0, i], output_path, sr)
                
                # Read the saved file for Gradio
                output_files[source] = output_path
        
        # Return the separated stems (assuming 4 stems: drums, bass, other, vocals)
        stems = [None] * 4
        status_msg = f"✅ Successfully separated into {len(source_names)} stems"
        
        for i, source in enumerate(source_names[:4]):  # Limit to 4 for UI
            if source in output_files:
                stems[i] = output_files[source]
        
        return tuple(stems + [status_msg])
        
    except Exception as e:
        error_msg = f"❌ Error during separation: {str(e)}"
        return None, None, None, None, error_msg

def create_hf_interface():
    """Create Hugging Face Spaces compatible interface"""
    
    with gr.Blocks(
        title="🎵 Music Stem Separator",
        theme=gr.themes.Soft(),
    ) as interface:
        
        gr.Markdown("""
        # 🎵 Music Stem Separator
        
        Separate music into individual stems using **Meta's Demucs** model.
        Upload an audio file and get separated tracks for **drums**, **bass**, **other instruments**, and **vocals**.
        
        ⚡ **Powered by Demucs** - State-of-the-art source separation
        """)
        
        with gr.Row():
            with gr.Column():
                # Input
                audio_input = gr.Audio(
                    type="filepath",
                    label="🎼 Upload Music File"
                )
                
                model_choice = gr.Dropdown(
                    choices=[
                        ("HTDemucs (4 stems)", "htdemucs"),
                        ("HTDemucs FT (4 stems)", "htdemucs_ft"), 
                        ("MDX Extra (4 stems)", "mdx_extra")
                    ],
                    value="htdemucs",
                    label="🤖 Model"
                )
                
                separate_btn = gr.Button(
                    "🎛️ Separate Stems", 
                    variant="primary"
                )
            
            with gr.Column():
                gr.Markdown("""
                ### ℹ️ About Stem Separation
                
                **What you'll get:**
                - 🥁 **Drums**: Percussion and rhythm
                - 🎸 **Bass**: Bass lines and low frequencies  
                - 🎹 **Other**: Instruments, synths, effects
                - 🎤 **Vocals**: Lead and backing vocals
                
                **Tips:**
                - Higher quality input = better separation
                - Processing takes 1-3 minutes depending on length
                - Works best with modern pop/rock music
                """)
        
        # Status
        status_output = gr.Textbox(
            label="Status",
            interactive=False
        )
        
        # Output stems
        gr.Markdown("### 🎶 Separated Stems")
        
        with gr.Row():
            drums_output = gr.Audio(
                label="🥁 Drums",
                interactive=False
            )
            bass_output = gr.Audio(
                label="🎸 Bass", 
                interactive=False
            )
        
        with gr.Row():
            other_output = gr.Audio(
                label="🎹 Other",
                interactive=False
            )
            vocals_output = gr.Audio(
                label="🎤 Vocals",
                interactive=False
            )
        
        # Connect the interface  
        separate_btn.click(
            fn=separate_stems,
            inputs=[audio_input, model_choice],
            outputs=[
                drums_output,
                bass_output, 
                other_output,
                vocals_output,
                status_output
            ]
        )
        
        # Examples - simplified for older Gradio
        gr.Markdown("""
        ### 🎵 Instructions
        1. Upload an audio file (MP3, WAV, FLAC)
        2. Choose a separation model  
        3. Click "Separate Stems"
        4. Download individual tracks when ready
        
        **Supported formats**: MP3, WAV, FLAC (max 50MB)
        **Processing time**: 1-5 minutes depending on file length
        """)
        
        gr.Markdown("""
        ---
        **Note**: This space uses Meta's Demucs for stem separation. Processing time depends on audio length and available compute resources.
        
        **Limitations**: 
        - Max file size: ~50MB
        - Processing time: 1-5 minutes
        - Works best with clear, well-produced music
        """)
    
    return interface

# Create and launch the interface
if __name__ == "__main__":
    demo = create_hf_interface()
    demo.launch()