import gradio as gr import torch import torchaudio import numpy as np from pathlib import Path import tempfile import os # Check if CUDA is available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") def separate_stems(audio_file, model_name="htdemucs"): """ Separate audio stems using Demucs """ if audio_file is None: return None, None, None, None, "❌ Please upload an audio file" try: # Import demucs modules from demucs.pretrained import get_model from demucs.apply import apply_model from demucs.audio import save_audio # Load the model model = get_model(model_name) model.to(device) model.eval() # Load audio wav, sr = torchaudio.load(audio_file) # Ensure stereo if wav.shape[0] == 1: wav = wav.repeat(2, 1) elif wav.shape[0] > 2: wav = wav[:2] # Resample if necessary if sr != model.samplerate: resampler = torchaudio.transforms.Resample(sr, model.samplerate) wav = resampler(wav) sr = model.samplerate # Move to device wav = wav.to(device) # Apply the model with torch.no_grad(): sources = apply_model(model, wav.unsqueeze(0)) # Get source names source_names = model.sources # Save separated sources output_files = {} with tempfile.TemporaryDirectory() as temp_dir: for i, source in enumerate(source_names): output_path = os.path.join(temp_dir, f"{source}.wav") save_audio(sources[0, i], output_path, sr) # Read the saved file for Gradio output_files[source] = output_path # Return the separated stems (assuming 4 stems: drums, bass, other, vocals) stems = [None] * 4 status_msg = f"✅ Successfully separated into {len(source_names)} stems" for i, source in enumerate(source_names[:4]): # Limit to 4 for UI if source in output_files: stems[i] = output_files[source] return tuple(stems + [status_msg]) except Exception as e: error_msg = f"❌ Error during separation: {str(e)}" return None, None, None, None, error_msg def create_hf_interface(): """Create Hugging Face Spaces compatible interface""" with gr.Blocks( title="đŸŽĩ Music Stem Separator", theme=gr.themes.Soft(), ) as interface: gr.Markdown(""" # đŸŽĩ Music Stem Separator Separate music into individual stems using **Meta's Demucs** model. Upload an audio file and get separated tracks for **drums**, **bass**, **other instruments**, and **vocals**. ⚡ **Powered by Demucs** - State-of-the-art source separation """) with gr.Row(): with gr.Column(): # Input audio_input = gr.Audio( type="filepath", label="đŸŽŧ Upload Music File" ) model_choice = gr.Dropdown( choices=[ ("HTDemucs (4 stems)", "htdemucs"), ("HTDemucs FT (4 stems)", "htdemucs_ft"), ("MDX Extra (4 stems)", "mdx_extra") ], value="htdemucs", label="🤖 Model" ) separate_btn = gr.Button( "đŸŽ›ī¸ Separate Stems", variant="primary" ) with gr.Column(): gr.Markdown(""" ### â„šī¸ About Stem Separation **What you'll get:** - đŸĨ **Drums**: Percussion and rhythm - 🎸 **Bass**: Bass lines and low frequencies - 🎹 **Other**: Instruments, synths, effects - 🎤 **Vocals**: Lead and backing vocals **Tips:** - Higher quality input = better separation - Processing takes 1-3 minutes depending on length - Works best with modern pop/rock music """) # Status status_output = gr.Textbox( label="Status", interactive=False ) # Output stems gr.Markdown("### đŸŽļ Separated Stems") with gr.Row(): drums_output = gr.Audio( label="đŸĨ Drums", interactive=False ) bass_output = gr.Audio( label="🎸 Bass", interactive=False ) with gr.Row(): other_output = gr.Audio( label="🎹 Other", interactive=False ) vocals_output = gr.Audio( label="🎤 Vocals", interactive=False ) # Connect the interface separate_btn.click( fn=separate_stems, inputs=[audio_input, model_choice], outputs=[ drums_output, bass_output, other_output, vocals_output, status_output ] ) # Examples - simplified for older Gradio gr.Markdown(""" ### đŸŽĩ Instructions 1. Upload an audio file (MP3, WAV, FLAC) 2. Choose a separation model 3. Click "Separate Stems" 4. Download individual tracks when ready **Supported formats**: MP3, WAV, FLAC (max 50MB) **Processing time**: 1-5 minutes depending on file length """) gr.Markdown(""" --- **Note**: This space uses Meta's Demucs for stem separation. Processing time depends on audio length and available compute resources. **Limitations**: - Max file size: ~50MB - Processing time: 1-5 minutes - Works best with clear, well-produced music """) return interface # Create and launch the interface if __name__ == "__main__": demo = create_hf_interface() demo.launch()