File size: 6,531 Bytes
314aa29
cba070b
 
 
 
314aa29
 
 
cba070b
 
 
314aa29
cba070b
 
 
 
 
 
314aa29
cba070b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314aa29
cba070b
 
314aa29
cba070b
 
 
314aa29
cba070b
 
 
314aa29
cba070b
314aa29
cba070b
 
 
314aa29
cba070b
 
314aa29
 
cba070b
314aa29
cba070b
314aa29
 
cba070b
314aa29
cba070b
 
314aa29
cba070b
314aa29
 
 
cba070b
 
314aa29
 
250f4e9
314aa29
 
cba070b
 
 
 
 
 
 
250f4e9
314aa29
 
 
cba070b
250f4e9
314aa29
 
cba070b
314aa29
cba070b
314aa29
cba070b
 
 
 
 
 
 
314aa29
cba070b
 
314aa29
 
cba070b
 
314aa29
250f4e9
314aa29
 
cba070b
 
314aa29
cba070b
 
 
250f4e9
cba070b
 
 
250f4e9
cba070b
 
 
 
 
250f4e9
cba070b
 
 
250f4e9
cba070b
314aa29
250f4e9
314aa29
cba070b
 
 
 
 
 
 
 
250f4e9
314aa29
 
250f4e9
 
 
 
 
 
 
 
 
 
 
cba070b
 
 
 
 
 
 
 
 
 
314aa29
cba070b
314aa29
cba070b
314aa29
cba070b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import gradio as gr
import torch
import torchaudio
import numpy as np
from pathlib import Path
import tempfile
import os

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def separate_stems(audio_file, model_name="htdemucs"):
    """
    Separate audio stems using Demucs
    """
    if audio_file is None:
        return None, None, None, None, "❌ Please upload an audio file"
    
    try:
        # Import demucs modules
        from demucs.pretrained import get_model
        from demucs.apply import apply_model
        from demucs.audio import save_audio
        
        # Load the model
        model = get_model(model_name)
        model.to(device)
        model.eval()
        
        # Load audio
        wav, sr = torchaudio.load(audio_file)
        
        # Ensure stereo
        if wav.shape[0] == 1:
            wav = wav.repeat(2, 1)
        elif wav.shape[0] > 2:
            wav = wav[:2]
        
        # Resample if necessary
        if sr != model.samplerate:
            resampler = torchaudio.transforms.Resample(sr, model.samplerate)
            wav = resampler(wav)
            sr = model.samplerate
        
        # Move to device
        wav = wav.to(device)
        
        # Apply the model
        with torch.no_grad():
            sources = apply_model(model, wav.unsqueeze(0))
        
        # Get source names
        source_names = model.sources
        
        # Save separated sources
        output_files = {}
        with tempfile.TemporaryDirectory() as temp_dir:
            for i, source in enumerate(source_names):
                output_path = os.path.join(temp_dir, f"{source}.wav")
                save_audio(sources[0, i], output_path, sr)
                
                # Read the saved file for Gradio
                output_files[source] = output_path
        
        # Return the separated stems (assuming 4 stems: drums, bass, other, vocals)
        stems = [None] * 4
        status_msg = f"βœ… Successfully separated into {len(source_names)} stems"
        
        for i, source in enumerate(source_names[:4]):  # Limit to 4 for UI
            if source in output_files:
                stems[i] = output_files[source]
        
        return tuple(stems + [status_msg])
        
    except Exception as e:
        error_msg = f"❌ Error during separation: {str(e)}"
        return None, None, None, None, error_msg

def create_hf_interface():
    """Create Hugging Face Spaces compatible interface"""
    
    with gr.Blocks(
        title="🎡 Music Stem Separator",
        theme=gr.themes.Soft(),
    ) as interface:
        
        gr.Markdown("""
        # 🎡 Music Stem Separator
        
        Separate music into individual stems using **Meta's Demucs** model.
        Upload an audio file and get separated tracks for **drums**, **bass**, **other instruments**, and **vocals**.
        
        ⚑ **Powered by Demucs** - State-of-the-art source separation
        """)
        
        with gr.Row():
            with gr.Column():
                # Input
                audio_input = gr.Audio(
                    type="filepath",
                    label="🎼 Upload Music File"
                )
                
                model_choice = gr.Dropdown(
                    choices=[
                        ("HTDemucs (4 stems)", "htdemucs"),
                        ("HTDemucs FT (4 stems)", "htdemucs_ft"), 
                        ("MDX Extra (4 stems)", "mdx_extra")
                    ],
                    value="htdemucs",
                    label="πŸ€– Model"
                )
                
                separate_btn = gr.Button(
                    "πŸŽ›οΈ Separate Stems", 
                    variant="primary"
                )
            
            with gr.Column():
                gr.Markdown("""
                ### ℹ️ About Stem Separation
                
                **What you'll get:**
                - πŸ₯ **Drums**: Percussion and rhythm
                - 🎸 **Bass**: Bass lines and low frequencies  
                - 🎹 **Other**: Instruments, synths, effects
                - 🎀 **Vocals**: Lead and backing vocals
                
                **Tips:**
                - Higher quality input = better separation
                - Processing takes 1-3 minutes depending on length
                - Works best with modern pop/rock music
                """)
        
        # Status
        status_output = gr.Textbox(
            label="Status",
            interactive=False
        )
        
        # Output stems
        gr.Markdown("### 🎢 Separated Stems")
        
        with gr.Row():
            drums_output = gr.Audio(
                label="πŸ₯ Drums",
                interactive=False
            )
            bass_output = gr.Audio(
                label="🎸 Bass", 
                interactive=False
            )
        
        with gr.Row():
            other_output = gr.Audio(
                label="🎹 Other",
                interactive=False
            )
            vocals_output = gr.Audio(
                label="🎀 Vocals",
                interactive=False
            )
        
        # Connect the interface  
        separate_btn.click(
            fn=separate_stems,
            inputs=[audio_input, model_choice],
            outputs=[
                drums_output,
                bass_output, 
                other_output,
                vocals_output,
                status_output
            ]
        )
        
        # Examples - simplified for older Gradio
        gr.Markdown("""
        ### 🎡 Instructions
        1. Upload an audio file (MP3, WAV, FLAC)
        2. Choose a separation model  
        3. Click "Separate Stems"
        4. Download individual tracks when ready
        
        **Supported formats**: MP3, WAV, FLAC (max 50MB)
        **Processing time**: 1-5 minutes depending on file length
        """)
        
        gr.Markdown("""
        ---
        **Note**: This space uses Meta's Demucs for stem separation. Processing time depends on audio length and available compute resources.
        
        **Limitations**: 
        - Max file size: ~50MB
        - Processing time: 1-5 minutes
        - Works best with clear, well-produced music
        """)
    
    return interface

# Create and launch the interface
if __name__ == "__main__":
    demo = create_hf_interface()
    demo.launch()