Spaces:

masszhou
/

bgmseparatorgpu

Running

App Files Files Community

masszhou commited on Apr 13

Commit

17d9938

1 Parent(s): 2e4d768

Add application file

Browse files

Files changed (4) hide show

app.py +647 -0
mdx_models/model_data.json +50 -0
pyproject.toml +30 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,647 @@

+import os
+# os.system("pip install ./ort_nightly_gpu-1.17.0.dev20240118002-cp310-cp310-manylinux_2_28_x86_64.whl")
+os.system("pip install ort-nightly-gpu --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple/")
+import gc
+import hashlib
+import queue
+import threading
+import json
+import shlex
+import sys
+import subprocess
+import librosa
+import numpy as np
+import soundfile as sf
+import torch
+from tqdm import tqdm
+import random
+import spaces
+import onnxruntime as ort
+import warnings
+import spaces
+import gradio as gr
+import logging
+import time
+import traceback
+import numpy as np
+import yt_dlp
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+from typing import Dict, Tuple
+MODEL_ID = "masszhou/mdxnet"
+MODELS_PATH = {
+    "bgm": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Inst_HQ_3.onnx")),
+    "basic_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR-MDX-NET-Voc_FT.onnx")),
+    "main_vocal": Path(hf_hub_download(repo_id=MODEL_ID, filename="UVR_MDXNET_KARA_2.onnx"))
+}
+STEM_NAMING = {
+    "Vocals": "Instrumental",
+    "Other": "Instruments",
+    "Instrumental": "Vocals",
+    "Drums": "Drumless",
+    "Bass": "Bassless",
+}
+class MDXModel:
+    def __init__(
+        self,
+        device,
+        dim_f,
+        dim_t,
+        n_fft,
+        hop=1024,
+        stem_name=None,
+        compensation=1.000,
+    ):
+        self.dim_f = dim_f
+        self.dim_t = dim_t
+        self.dim_c = 4
+        self.n_fft = n_fft
+        self.hop = hop
+        self.stem_name = stem_name
+        self.compensation = compensation
+        self.n_bins = self.n_fft // 2 + 1
+        self.chunk_size = hop * (self.dim_t - 1)
+        self.window = torch.hann_window(
+            window_length=self.n_fft, periodic=True
+        ).to(device)
+        out_c = self.dim_c
+        self.freq_pad = torch.zeros(
+            [1, out_c, self.n_bins - self.dim_f, self.dim_t]
+        ).to(device)
+    def stft(self, x):
+        x = x.reshape([-1, self.chunk_size])
+        x = torch.stft(
+            x,
+            n_fft=self.n_fft,
+            hop_length=self.hop,
+            window=self.window,
+            center=True,
+            return_complex=True,
+        )
+        x = torch.view_as_real(x)
+        x = x.permute([0, 3, 1, 2])
+        x = x.reshape([-1, 2, 2, self.n_bins, self.dim_t]).reshape(
+            [-1, 4, self.n_bins, self.dim_t]
+        )
+        return x[:, :, : self.dim_f]
+    def istft(self, x, freq_pad=None):
+        freq_pad = (
+            self.freq_pad.repeat([x.shape[0], 1, 1, 1])
+            if freq_pad is None
+            else freq_pad
+        )
+        x = torch.cat([x, freq_pad], -2)
+        # c = 4*2 if self.target_name=='*' else 2
+        x = x.reshape([-1, 2, 2, self.n_bins, self.dim_t]).reshape(
+            [-1, 2, self.n_bins, self.dim_t]
+        )
+        x = x.permute([0, 2, 3, 1])
+        x = x.contiguous()
+        x = torch.view_as_complex(x)
+        x = torch.istft(
+            x,
+            n_fft=self.n_fft,
+            hop_length=self.hop,
+            window=self.window,
+            center=True,
+        )
+        return x.reshape([-1, 2, self.chunk_size])
+class MDX:
+    DEFAULT_SR = 44100
+    # Unit: seconds
+    DEFAULT_CHUNK_SIZE = 0 * DEFAULT_SR
+    DEFAULT_MARGIN_SIZE = 1 * DEFAULT_SR
+    def __init__(
+        self, model_path: str, params: MDXModel, processor=0
+    ):
+        # Set the device and the provider (CPU or CUDA)
+        self.device = (
+            torch.device(f"cuda:{processor}")
+            if processor >= 0
+            else torch.device("cpu")
+        )
+        self.provider = (
+            ["CUDAExecutionProvider"]
+            if processor >= 0
+            else ["CPUExecutionProvider"]
+        )
+        self.model = params
+        # Load the ONNX model using ONNX Runtime
+        self.ort = ort.InferenceSession(model_path, providers=self.provider)
+        # Preload the model for faster performance
+        self.ort.run(
+            None,
+            {"input": torch.rand(1, 4, params.dim_f, params.dim_t).numpy()},
+        )
+        self.process = lambda spec: self.ort.run(
+            None, {"input": spec.cpu().numpy()}
+        )[0]
+        self.prog = None
+    @staticmethod
+    def get_hash(model_path):
+        try:
+            with open(model_path, "rb") as f:
+                f.seek(-10000 * 1024, 2)
+                model_hash = hashlib.md5(f.read()).hexdigest()
+        except: # noqa
+            model_hash = hashlib.md5(open(model_path, "rb").read()).hexdigest()
+        return model_hash
+    @staticmethod
+    def segment(
+        wave,
+        combine=True,
+        chunk_size=DEFAULT_CHUNK_SIZE,
+        margin_size=DEFAULT_MARGIN_SIZE,
+    ):
+        """
+        Segment or join segmented wave array
+        Args:
+            wave: (np.array) Wave array to be segmented or joined
+            combine: (bool) If True, combines segmented wave array.
+                If False, segments wave array.
+            chunk_size: (int) Size of each segment (in samples)
+            margin_size: (int) Size of margin between segments (in samples)
+        Returns:
+            numpy array: Segmented or joined wave array
+        """
+        if combine:
+            # Initializing as None instead of [] for later numpy array concatenation
+            processed_wave = None
+            for segment_count, segment in enumerate(wave):
+                start = 0 if segment_count == 0 else margin_size
+                end = None if segment_count == len(wave) - 1 else -margin_size
+                if margin_size == 0:
+                    end = None
+                if processed_wave is None:  # Create array for first segment
+                    processed_wave = segment[:, start:end]
+                else:  # Concatenate to existing array for subsequent segments
+                    processed_wave = np.concatenate(
+                        (processed_wave, segment[:, start:end]), axis=-1
+                    )
+        else:
+            processed_wave = []
+            sample_count = wave.shape[-1]
+            if chunk_size <= 0 or chunk_size > sample_count:
+                chunk_size = sample_count
+            if margin_size > chunk_size:
+                margin_size = chunk_size
+            for segment_count, skip in enumerate(
+                range(0, sample_count, chunk_size)
+            ):
+                margin = 0 if segment_count == 0 else margin_size
+                end = min(skip + chunk_size + margin_size, sample_count)
+                start = skip - margin
+                cut = wave[:, start:end].copy()
+                processed_wave.append(cut)
+                if end == sample_count:
+                    break
+        return processed_wave
+    def pad_wave(self, wave):
+        """
+        Pad the wave array to match the required chunk size
+        Args:
+            wave: (np.array) Wave array to be padded
+        Returns:
+            tuple: (padded_wave, pad, trim)
+                - padded_wave: Padded wave array
+                - pad: Number of samples that were padded
+                - trim: Number of samples that were trimmed
+        """
+        n_sample = wave.shape[1]
+        trim = self.model.n_fft // 2
+        gen_size = self.model.chunk_size - 2 * trim
+        pad = gen_size - n_sample % gen_size
+        # Padded wave
+        wave_p = np.concatenate(
+            (
+                np.zeros((2, trim)),
+                wave,
+                np.zeros((2, pad)),
+                np.zeros((2, trim)),
+            ),
+            1,
+        )
+        mix_waves = []
+        for i in range(0, n_sample + pad, gen_size):
+            waves = np.array(wave_p[:, i:i + self.model.chunk_size])
+            mix_waves.append(waves)
+        mix_waves = torch.tensor(mix_waves, dtype=torch.float32).to(
+            self.device
+        )
+        return mix_waves, pad, trim
+    def _process_wave(self, mix_waves, trim, pad, q: queue.Queue, _id: int):
+        """
+        Process each wave segment in a multi-threaded environment
+        Args:
+            mix_waves: (torch.Tensor) Wave segments to be processed
+            trim: (int) Number of samples trimmed during padding
+            pad: (int) Number of samples padded during padding
+            q: (queue.Queue) Queue to hold the processed wave segments
+            _id: (int) Identifier of the processed wave segment
+        Returns:
+            numpy array: Processed wave segment
+        """
+        mix_waves = mix_waves.split(1)
+        with torch.no_grad():
+            pw = []
+            for mix_wave in mix_waves:
+                self.prog.update()
+                spec = self.model.stft(mix_wave)
+                processed_spec = torch.tensor(self.process(spec))
+                processed_wav = self.model.istft(
+                    processed_spec.to(self.device)
+                )
+                processed_wav = (
+                    processed_wav[:, :, trim:-trim]
+                    .transpose(0, 1)
+                    .reshape(2, -1)
+                    .cpu()
+                    .numpy()
+                )
+                pw.append(processed_wav)
+        processed_signal = np.concatenate(pw, axis=-1)[:, :-pad]
+        q.put({_id: processed_signal})
+        return processed_signal
+    def process_wave(self, wave: np.array, mt_threads=1):
+        """
+        Process the wave array in a multi-threaded environment
+        Args:
+            wave: (np.array) Wave array to be processed
+            mt_threads: (int) Number of threads to be used for processing
+        Returns:
+            numpy array: Processed wave array
+        """
+        self.prog = tqdm(total=0)
+        chunk = wave.shape[-1] // mt_threads
+        waves = self.segment(wave, False, chunk)
+        # Create a queue to hold the processed wave segments
+        q = queue.Queue()
+        threads = []
+        for c, batch in enumerate(waves):
+            mix_waves, pad, trim = self.pad_wave(batch)
+            self.prog.total = len(mix_waves) * mt_threads
+            thread = threading.Thread(
+                target=self._process_wave, args=(mix_waves, trim, pad, q, c)
+            )
+            thread.start()
+            threads.append(thread)
+        for thread in threads:
+            thread.join()
+        self.prog.close()
+        processed_batches = []
+        while not q.empty():
+            processed_batches.append(q.get())
+        processed_batches = [
+            list(wave.values())[0]
+            for wave in sorted(
+                processed_batches, key=lambda d: list(d.keys())[0]
+            )
+        ]
+        assert len(processed_batches) == len(
+            waves
+        ), "Incomplete processed batches, please reduce batch size!"
+        return self.segment(processed_batches, True, chunk)
+@spaces.GPU()
+def run_mdx(
+    model_params,
+    output_dir,
+    model_path,
+    filename,
+    exclude_main=False,
+    exclude_inversion=False,
+    suffix=None,
+    invert_suffix=None,
+    denoise=False,
+    keep_orig=True,
+    m_threads=2,
+    device_base="cuda",
+):
+    if device_base == "cuda":
+        device = torch.device("cuda:0")
+        processor_num = 0
+        device_properties = torch.cuda.get_device_properties(device)
+        vram_gb = device_properties.total_memory / 1024**3
+        m_threads = 1 if vram_gb < 8 else (8 if vram_gb > 32 else 2)
+    else:
+        device = torch.device("cpu")
+        processor_num = -1
+        m_threads = 1
+    model_hash = MDX.get_hash(model_path)
+    mp = model_params.get(model_hash)
+    model = MDXModel(
+        device,
+        dim_f=mp["mdx_dim_f_set"],
+        dim_t=2 ** mp["mdx_dim_t_set"],
+        n_fft=mp["mdx_n_fft_scale_set"],
+        stem_name=mp["primary_stem"],
+        compensation=mp["compensate"],
+    )
+    mdx_sess = MDX(model_path, model, processor=processor_num)
+    wave, sr = librosa.load(filename, mono=False, sr=44100)
+    # normalizing input wave gives better output
+    peak = max(np.max(wave), abs(np.min(wave)))
+    wave /= peak
+    if denoise:
+        wave_processed = -(mdx_sess.process_wave(-wave, m_threads)) + (
+            mdx_sess.process_wave(wave, m_threads)
+        )
+        wave_processed *= 0.5
+    else:
+        wave_processed = mdx_sess.process_wave(wave, m_threads)
+    # return to previous peak
+    wave_processed *= peak
+    stem_name = model.stem_name if suffix is None else suffix
+    main_filepath = None
+    if not exclude_main:
+        main_filepath = os.path.join(
+            output_dir,
+            f"{os.path.basename(os.path.splitext(filename)[0])}_{stem_name}.wav",
+        )
+        sf.write(main_filepath, wave_processed.T, sr)
+    invert_filepath = None
+    if not exclude_inversion:
+        diff_stem_name = (
+            stem_naming.get(stem_name)
+            if invert_suffix is None
+            else invert_suffix
+        )
+        stem_name = (
+            f"{stem_name}_diff" if diff_stem_name is None else diff_stem_name
+        )
+        invert_filepath = os.path.join(
+            output_dir,
+            f"{os.path.basename(os.path.splitext(filename)[0])}_{stem_name}.wav",
+        )
+        sf.write(
+            invert_filepath,
+            (-wave_processed.T * model.compensation) + wave.T,
+            sr,
+        )
+    if not keep_orig:
+        os.remove(filename)
+    del mdx_sess, wave_processed, wave
+    gc.collect()
+    torch.cuda.empty_cache()
+    return main_filepath, invert_filepath
+def run_mdx_beta(
+    model_params,
+    output_dir,
+    model_path,
+    filename,
+    exclude_main=False,
+    exclude_inversion=False,
+    suffix=None,
+    invert_suffix=None,
+    denoise=False,
+    keep_orig=True,
+    m_threads=2,
+    device_base="",
+):
+    m_threads = 1
+    duration = librosa.get_duration(filename=filename)
+    if duration >= 60 and duration <= 120:
+        m_threads = 8
+    elif duration > 120:
+        m_threads = 16
+    model_hash = MDX.get_hash(model_path)
+    device = torch.device("cpu")
+    processor_num = -1
+    mp = model_params.get(model_hash)
+    model = MDXModel(
+        device,
+        dim_f=mp["mdx_dim_f_set"],
+        dim_t=2 ** mp["mdx_dim_t_set"],
+        n_fft=mp["mdx_n_fft_scale_set"],
+        stem_name=mp["primary_stem"],
+        compensation=mp["compensate"],
+    )
+    mdx_sess = MDX(model_path, model, processor=processor_num)
+    wave, sr = librosa.load(filename, mono=False, sr=44100)
+    # normalizing input wave gives better output
+    peak = max(np.max(wave), abs(np.min(wave)))
+    wave /= peak
+    if denoise:
+        wave_processed = -(mdx_sess.process_wave(-wave, m_threads)) + (
+            mdx_sess.process_wave(wave, m_threads)
+        )
+        wave_processed *= 0.5
+    else:
+        wave_processed = mdx_sess.process_wave(wave, m_threads)
+    # return to previous peak
+    wave_processed *= peak
+    stem_name = model.stem_name if suffix is None else suffix
+    main_filepath = None
+    if not exclude_main:
+        main_filepath = os.path.join(
+            output_dir,
+            f"{os.path.basename(os.path.splitext(filename)[0])}_{stem_name}.wav",
+        )
+        sf.write(main_filepath, wave_processed.T, sr)
+    invert_filepath = None
+    if not exclude_inversion:
+        diff_stem_name = (
+            stem_naming.get(stem_name)
+            if invert_suffix is None
+            else invert_suffix
+        )
+        stem_name = (
+            f"{stem_name}_diff" if diff_stem_name is None else diff_stem_name
+        )
+        invert_filepath = os.path.join(
+            output_dir,
+            f"{os.path.basename(os.path.splitext(filename)[0])}_{stem_name}.wav",
+        )
+        sf.write(
+            invert_filepath,
+            (-wave_processed.T * model.compensation) + wave.T,
+            sr,
+        )
+    if not keep_orig:
+        os.remove(filename)
+    del mdx_sess, wave_processed, wave
+    gc.collect()
+    torch.cuda.empty_cache()
+    return main_filepath, invert_filepath
+def extract_bgm(mdx_model_params: Dict,
+                input_filename: Path,
+                model_bgm_path: Path,
+                output_dir: Path,
+                device_base: str = "cuda") -> Path:
+    """
+    Extract pure background music, remove vocals
+    """
+    background_path, _ = run_mdx(model_params=mdx_model_params,
+                                  input_filename=input_filename,
+                                  output_dir=output_dir,
+                                  model_path=model_bgm_path,
+                                  denoise=False,
+                                  device_base=device_base,
+                                  )
+    return background_path
+def extract_vocal(mdx_model_params: Dict,
+                  input_filename: Path,
+                  model_basic_vocal_path: Path,
+                  model_main_vocal_path: Path,
+                  output_dir: Path,
+                  main_vocals_flag: bool = False,
+                  device_base: str = "cuda") -> Path:
+    """
+    Extract vocals
+    """
+    # First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
+    vocals_path, _ = run_mdx(mdx_model_params,
+                             input_filename,
+                             output_dir,
+                             model_basic_vocal_path,
+                             denoise=True,
+                             device_base=device_base,
+                             )
+    # If "main_vocals_flag" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main vocals (Main) from backup vocals/background vocals (Backup)
+    if main_vocals_flag:
+        time.sleep(2)
+        backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params,
+                                                       output_dir,
+                                                       model_main_vocal_path,
+                                                       vocals_path,
+                                                       denoise=True,
+                                                       device_base=device_base,
+                                                       )
+        vocals_path = main_vocals_path
+    return vocals_path
+def process_uvr_task(input_file_path: Path,
+                     output_dir: Path,
+                     models_path: Dict[str, Path],
+                     main_vocals_flag: bool = False,  # If "Main" is enabled, use UVR_MDXNET_KARA_2.onnx to further separate main and backup vocals
+                     ) -> Tuple[Path, Path]:
+    device_base = "cuda" if torch.cuda.is_available() else "cpu"
+    # load mdx model definition
+    with open("./mdx_models/model_data.json") as infile:
+        mdx_model_params = json.load(infile)  # type: Dict
+    output_dir.mkdir(parents=True, exist_ok=True)
+    input_file_path = convert_to_stereo_and_wav(input_file_path)  # type: Path
+    # 1. Extract pure background music, remove vocals
+    background_path = extract_bgm(mdx_model_params,
+                                  input_file_path,
+                                  models_path["bgm"],
+                                  output_dir,
+                                  device_base=device_base)
+    # 2. Separate vocals
+    # First use UVR-MDX-NET-Voc_FT.onnx basic vocal separation model
+    vocals_path = extract_vocal(mdx_model_params,
+                                input_file_path,
+                                models_path["basic_vocal"],
+                                models_path["main_vocal"],
+                                output_dir,
+                                main_vocals_flag=main_vocals_flag,
+                                device_base=device_base)
+    return background_path, vocals_path
+def get_model_params(model_path: Path) -> Dict:
+    """
+    Get model parameters from model path
+    """
+    with open(model_path / "model_data.json") as infile:
+        return json.load(infile)  # type: Dict
+def inference_mdx(audio_file: str) -> list[str]:
+    mdx_model_params = get_model_params(Path("./mdx_models"))
+    audio_file = convert_to_stereo_and_wav(Path(audio_file))  # resampling at 44100 Hz
+    device_base = "cuda" if torch.cuda.is_available() else "cpu"
+    output_dir = Path("./out/mdx")
+    os.makedirs(output_dir, exist_ok=True)
+    model_bgm_path = MODELS_PATH["bgm"]
+    background_path, vocal_path = run_mdx(
+        model_params=mdx_model_params,
+        input_filename=audio_file,
+        output_dir=output_dir,
+        model_path=model_bgm_path,
+        denoise=False,
+        device_base=device_base,
+        )
+    return str(vocal_path), str(background_path)
+if __name__ == "__main__":
+    # zero = torch.Tensor([0]).cuda()
+    # print(f"zero.device: {zero.device}")
+    app = gr.Interface(
+        fn = inference_mdx,
+        inputs = gr.Audio(type="filepath", label="Input"),
+        outputs = [gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="BGM")],
+        title="MDXNET Music Source Separation",
+        article="<p style='text-align: center'><a href='https://arxiv.org/abs/2111.12203' target='_blank'>KUIELab-MDX-Net: A Two-Stream Neural Network for Music Demixing</a> | <a href='https://github.com/kuielab/mdx-net' target='_blank'>Github Repo</a> | <a href='https://github.com/kuielab/mdx-net/blob/main/LICENSE' target='_blank'>MIT License</a></p>",
+        api_name="mdxnet_separation",
+    )
+    app.launch()

mdx_models/model_data.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+    "77d07b2667ddf05b9e3175941b4454a0": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals",
+        "name": "UVR-MDX-NET-Voc_FT.onnx"
+    },
+    "1d64a6d2c30f709b8c9b4ce1366d96ee": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental",
+        "name": "UVR_MDXNET_KARA_2.onnx"
+    },
+    "cd5b2989ad863f116c855db1dfe24e39": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Other",
+        "name": "Reverb_HQ_By_FoxJoy.onnx"
+    },
+    "55657dd70583b0fedfba5f67df11d711": {
+        "compensate": 1.022,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental",
+        "name": "UVR-MDX-NET-Inst_HQ_3.onnx"
+    },
+    "cc63408db3d80b4d85b0287d1d7c9632": {
+        "compensate": 1.033,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental",
+        "name": "UVR-MDX-NET-Inst_HQ_2.onnx"
+    },
+    "0f2a6bc5b49d87d64728ee40e23bceb1": {
+        "compensate": 1.022,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental",
+        "name": "UVR-MDX-NET-Inst_HQ_4.onnx"
+    }
+}

pyproject.toml ADDED Viewed

	@@ -0,0 +1,30 @@

+[tool.poetry]
+name = "bgmseparatorgpu"
+version = "0.1.0"
+description = ""
+authors = ["Zhiliang Zhou <[email protected]>"]
+readme = "README.md"
+package-mode = false
+[tool.poetry.dependencies]
+python = ">=3.11,<3.13"
+gradio = "4.42.0"
+pydantic = "2.8.2"
+fastapi = "0.112.2"
+scipy = "^1.15.2"
+numpy = "^2.2.4"
+onnxruntime = "^1.21.0"
+torch = "^2.6.0"
+tqdm = "^4.67.1"
+librosa = "^0.11.0"
+soundfile = "^0.13.1"
+spaces = "^0.34.2"
+huggingface-hub = "^0.30.2"
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"
+jupyter = "^1.1.1"
+qtconsole = "^5.6.1"
+pyqt5 = "^5.15.11"

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+soundfile
+librosa
+torch==2.2.0
+pedalboard
+yt-dlp
+gradio==4.42.0
+pydantic==2.8.2
+fastapi==0.112.2
+scipy
+numpy