Spaces:

ahk-d
/

stem-remixer

Running

App Files Files Community

stem-remixer / app.py

ahk-d

Update app.py

250f4e9 verified 17 days ago

raw

history blame contribute delete

6.53 kB

	import gradio as gr
	import torch
	import torchaudio
	import numpy as np
	from pathlib import Path
	import tempfile
	import os

	# Check if CUDA is available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")

	def separate_stems(audio_file, model_name="htdemucs"):
	"""
	Separate audio stems using Demucs
	"""
	if audio_file is None:
	return None, None, None, None, "❌ Please upload an audio file"

	try:
	# Import demucs modules
	from demucs.pretrained import get_model
	from demucs.apply import apply_model
	from demucs.audio import save_audio

	# Load the model
	model = get_model(model_name)
	model.to(device)
	model.eval()

	# Load audio
	wav, sr = torchaudio.load(audio_file)

	# Ensure stereo
	if wav.shape[0] == 1:
	wav = wav.repeat(2, 1)
	elif wav.shape[0] > 2:
	wav = wav[:2]

	# Resample if necessary
	if sr != model.samplerate:
	resampler = torchaudio.transforms.Resample(sr, model.samplerate)
	wav = resampler(wav)
	sr = model.samplerate

	# Move to device
	wav = wav.to(device)

	# Apply the model
	with torch.no_grad():
	sources = apply_model(model, wav.unsqueeze(0))

	# Get source names
	source_names = model.sources

	# Save separated sources
	output_files = {}
	with tempfile.TemporaryDirectory() as temp_dir:
	for i, source in enumerate(source_names):
	output_path = os.path.join(temp_dir, f"{source}.wav")
	save_audio(sources[0, i], output_path, sr)

	# Read the saved file for Gradio
	output_files[source] = output_path

	# Return the separated stems (assuming 4 stems: drums, bass, other, vocals)
	stems = [None] * 4
	status_msg = f"✅ Successfully separated into {len(source_names)} stems"

	for i, source in enumerate(source_names[:4]): # Limit to 4 for UI
	if source in output_files:
	stems[i] = output_files[source]

	return tuple(stems + [status_msg])

	except Exception as e:
	error_msg = f"❌ Error during separation: {str(e)}"
	return None, None, None, None, error_msg

	def create_hf_interface():
	"""Create Hugging Face Spaces compatible interface"""

	with gr.Blocks(
	title="🎵 Music Stem Separator",
	theme=gr.themes.Soft(),
	) as interface:

	gr.Markdown("""
	# 🎵 Music Stem Separator

	Separate music into individual stems using Meta's Demucs model.
	Upload an audio file and get separated tracks for drums, bass, other instruments, and vocals.

	⚡ Powered by Demucs - State-of-the-art source separation
	""")

	with gr.Row():
	with gr.Column():
	# Input
	audio_input = gr.Audio(
	type="filepath",
	label="🎼 Upload Music File"
	)

	model_choice = gr.Dropdown(
	choices=[
	("HTDemucs (4 stems)", "htdemucs"),
	("HTDemucs FT (4 stems)", "htdemucs_ft"),
	("MDX Extra (4 stems)", "mdx_extra")
	],
	value="htdemucs",
	label="🤖 Model"
	)

	separate_btn = gr.Button(
	"🎛️ Separate Stems",
	variant="primary"
	)

	with gr.Column():
	gr.Markdown("""
	### ℹ️ About Stem Separation

	What you'll get:
	- 🥁 Drums: Percussion and rhythm
	- 🎸 Bass: Bass lines and low frequencies
	- 🎹 Other: Instruments, synths, effects
	- 🎤 Vocals: Lead and backing vocals

	Tips:
	- Higher quality input = better separation
	- Processing takes 1-3 minutes depending on length
	- Works best with modern pop/rock music
	""")

	# Status
	status_output = gr.Textbox(
	label="Status",
	interactive=False
	)

	# Output stems
	gr.Markdown("### 🎶 Separated Stems")

	with gr.Row():
	drums_output = gr.Audio(
	label="🥁 Drums",
	interactive=False
	)
	bass_output = gr.Audio(
	label="🎸 Bass",
	interactive=False
	)

	with gr.Row():
	other_output = gr.Audio(
	label="🎹 Other",
	interactive=False
	)
	vocals_output = gr.Audio(
	label="🎤 Vocals",
	interactive=False
	)

	# Connect the interface
	separate_btn.click(
	fn=separate_stems,
	inputs=[audio_input, model_choice],
	outputs=[
	drums_output,
	bass_output,
	other_output,
	vocals_output,
	status_output
	]
	)

	# Examples - simplified for older Gradio
	gr.Markdown("""
	### 🎵 Instructions
	1. Upload an audio file (MP3, WAV, FLAC)
	2. Choose a separation model
	3. Click "Separate Stems"
	4. Download individual tracks when ready

	Supported formats: MP3, WAV, FLAC (max 50MB)
	Processing time: 1-5 minutes depending on file length
	""")

	gr.Markdown("""
	---
	Note: This space uses Meta's Demucs for stem separation. Processing time depends on audio length and available compute resources.

	Limitations:
	- Max file size: ~50MB
	- Processing time: 1-5 minutes
	- Works best with clear, well-produced music
	""")

	return interface

	# Create and launch the interface
	if __name__ == "__main__":
	demo = create_hf_interface()
	demo.launch()