Spaces:
Sleeping
Sleeping
File size: 3,634 Bytes
265c6e4 89bed8d 265c6e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import os
import re
import random
from scipy.io.wavfile import write, read
import numpy as np
import gradio as gr
import yt_dlp
# Model dictionaries and lists
roformer_models = {
'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
}
mdx23c_models = [
'MDX23C_D1581.ckpt',
'MDX23C-8KFFT-InstVoc_HQ.ckpt',
'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
]
# More model lists...
output_format = ['wav', 'flac', 'mp3']
mdxnet_overlap_values = ['0.25', '0.5', '0.75', '0.99']
vrarch_window_size_values = ['320', '512', '1024']
demucs_overlap_values = ['0.25', '0.50', '0.75', '0.99']
# Function to download audio
def download_audio(url):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'ytdl/%(title)s.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
sample_rate, audio_data = read(file_path)
audio_array = np.asarray(audio_data, dtype=np.int16)
return sample_rate, audio_array
# Function to separate audio using Roformer
def roformer_separator(audio, model, output_format, overlap, segment_size, denoise):
directory = "./outputs"
random_id = str(random.randint(10000, 99999))
os.makedirs("outputs", exist_ok=True)
write(f'{random_id}.wav', audio[0], audio[1])
full_roformer_model = roformer_models[model]
prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={output_format} --normalization=0.9 --mdxc_overlap={overlap} --mdxc_segment_size={segment_size}"
if denoise:
prompt += " --mdx_enable_denoise"
os.system(prompt)
files_list = [os.path.join(directory, file) for file in os.listdir(directory) if re.search(random_id, file)]
stem1_file, stem2_file, stem3_file = files_list[:3] # Assuming the files are in the correct order
return stem1_file, stem2_file, stem3_file
# Gradio interface
def process_audio(url, model, output_format, overlap, segment_size, denoise):
sample_rate, audio_array = download_audio(url)
stems = roformer_separator((sample_rate, audio_array), model, output_format, overlap, segment_size, denoise)
return stems
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Hex Audio Separator")
with gr.Row():
url_input = gr.Textbox(label="YouTube URL")
model_input = gr.Dropdown(choices=list(roformer_models.keys()), label="Roformer Model")
format_input = gr.Dropdown(choices=output_format, label="Output Format")
overlap_input = gr.Dropdown(choices=mdxnet_overlap_values, label="Overlap")
segment_input = gr.Slider(0, 100, label="Segment Size")
denoise_input = gr.Checkbox(label="Enable Denoise")
output1 = gr.Audio(label="Vocals")
output2 = gr.Audio(label="Instrumental")
output3 = gr.Audio(label="Backing Vocals")
submit_button = gr.Button("Process")
submit_button.click(
process_audio,
inputs=[url_input, model_input, format_input, overlap_input, segment_input, denoise_input],
outputs=[output1, output2, output3]
)
demo.launch()
|