Spaces:

AnhP
/

RVC-GUI

Running

App Files Files Community

RVC-GUI / main /library /architectures /demucs_separator.py

AnhP

Upload 92 files

6cfcfea verified 5 months ago

raw

history blame

8.55 kB

	import os
	import sys
	import yaml
	import torch

	import numpy as np
	from hashlib import sha256

	sys.path.append(os.getcwd())

	from main.configs.config import Config
	from main.library.uvr5_separator import spec_utils, common_separator
	from main.library.uvr5_separator.demucs import hdemucs, states, apply

	translations = Config().translations
	sys.path.insert(0, os.path.join(os.getcwd(), "main", "library", "uvr5_separator"))

	DEMUCS_4_SOURCE_MAPPER = {common_separator.CommonSeparator.BASS_STEM: 0, common_separator.CommonSeparator.DRUM_STEM: 1, common_separator.CommonSeparator.OTHER_STEM: 2, common_separator.CommonSeparator.VOCAL_STEM: 3}


	class DemucsSeparator(common_separator.CommonSeparator):
	def __init__(self, common_config, arch_config):
	super().__init__(config=common_config)
	self.segment_size = arch_config.get("segment_size", "Default")
	self.shifts = arch_config.get("shifts", 2)
	self.overlap = arch_config.get("overlap", 0.25)
	self.segments_enabled = arch_config.get("segments_enabled", True)
	self.logger.debug(translations["demucs_info"].format(segment_size=self.segment_size, segments_enabled=self.segments_enabled))
	self.logger.debug(translations["demucs_info_2"].format(shifts=self.shifts, overlap=self.overlap))
	self.demucs_source_map = DEMUCS_4_SOURCE_MAPPER
	self.audio_file_path = None
	self.audio_file_base = None
	self.demucs_model_instance = None
	self.logger.info(translations["start_demucs"])

	def separate(self, audio_file_path):
	self.logger.debug(translations["start_separator"])
	source = None
	inst_source = {}
	self.audio_file_path = audio_file_path
	self.audio_file_base = os.path.splitext(os.path.basename(audio_file_path))[0]
	self.logger.debug(translations["prepare_mix"])
	mix = self.prepare_mix(self.audio_file_path)
	self.logger.debug(translations["demix"].format(shape=mix.shape))
	self.logger.debug(translations["cancel_mix"])
	self.demucs_model_instance = hdemucs.HDemucs(sources=["drums", "bass", "other", "vocals"])
	self.demucs_model_instance = get_demucs_model(name=os.path.splitext(os.path.basename(self.model_path))[0], repo=os.path.dirname(self.model_path))
	self.demucs_model_instance = apply.demucs_segments(self.segment_size, self.demucs_model_instance)
	self.demucs_model_instance.to(self.torch_device)
	self.demucs_model_instance.eval()
	self.logger.debug(translations["model_review"])
	source = self.demix_demucs(mix)
	del self.demucs_model_instance
	self.clear_gpu_cache()
	self.logger.debug(translations["del_gpu_cache_after_demix"])
	output_files = []
	self.logger.debug(translations["process_output_file"])

	if isinstance(inst_source, np.ndarray):
	self.logger.debug(translations["process_ver"])
	inst_source[self.demucs_source_map[common_separator.CommonSeparator.VOCAL_STEM]] = spec_utils.reshape_sources(inst_source[self.demucs_source_map[common_separator.CommonSeparator.VOCAL_STEM]], source[self.demucs_source_map[common_separator.CommonSeparator.VOCAL_STEM]])
	source = inst_source

	if isinstance(source, np.ndarray):
	source_length = len(source)
	self.logger.debug(translations["source_length"].format(source_length=source_length))
	self.logger.debug(translations["set_map"].format(part=source_length))

	match source_length:
	case 2: self.demucs_source_map = {common_separator.CommonSeparator.INST_STEM: 0, common_separator.CommonSeparator.VOCAL_STEM: 1}
	case 6: self.demucs_source_map = {common_separator.CommonSeparator.BASS_STEM: 0, common_separator.CommonSeparator.DRUM_STEM: 1, common_separator.CommonSeparator.OTHER_STEM: 2, common_separator.CommonSeparator.VOCAL_STEM: 3, common_separator.CommonSeparator.GUITAR_STEM: 4, common_separator.CommonSeparator.PIANO_STEM: 5}
	case _: self.demucs_source_map = DEMUCS_4_SOURCE_MAPPER

	self.logger.debug(translations["process_all_part"])

	for stem_name, stem_value in self.demucs_source_map.items():
	if self.output_single_stem is not None:
	if stem_name.lower() != self.output_single_stem.lower():
	self.logger.debug(translations["skip_part"].format(stem_name=stem_name, output_single_stem=self.output_single_stem))
	continue

	stem_path = os.path.join(f"{self.audio_file_base}_({stem_name})_{self.model_name}.{self.output_format.lower()}")
	self.final_process(stem_path, source[stem_value].T, stem_name)
	output_files.append(stem_path)

	return output_files

	def demix_demucs(self, mix):
	self.logger.debug(translations["starting_demix_demucs"])
	processed = {}
	mix = torch.tensor(mix, dtype=torch.float32)
	ref = mix.mean(0)
	mix = (mix - ref.mean()) / ref.std()
	mix_infer = mix

	with torch.no_grad():
	self.logger.debug(translations["model_infer"])
	sources = apply.apply_model(model=self.demucs_model_instance, mix=mix_infer[None], shifts=self.shifts, split=self.segments_enabled, overlap=self.overlap, static_shifts=1 if self.shifts == 0 else self.shifts, set_progress_bar=None, device=self.torch_device, progress=True)[0]

	sources = (sources * ref.std() + ref.mean()).cpu().numpy()
	sources[[0, 1]] = sources[[1, 0]]

	processed[mix] = sources[:, :, 0:None].copy()
	return np.concatenate([s[:, :, 0:None] for s in list(processed.values())], axis=-1)

	class LocalRepo:
	def __init__(self, root):
	self.root = root
	self.scan()

	def scan(self):
	self._models, self._checksums = {}, {}
	for filename in os.listdir(self.root):
	filepath = os.path.join(self.root, filename)
	if not os.path.isfile(filepath): continue

	if os.path.splitext(filename)[1] == ".th":
	stem = os.path.splitext(filename)[0]

	if "-" in stem:
	xp_sig, checksum = stem.split("-", 1)
	self._checksums[xp_sig] = checksum
	else: xp_sig = stem

	if xp_sig in self._models: raise RuntimeError(translations["del_all_but_one"].format(xp_sig=xp_sig))
	self._models[xp_sig] = filepath

	def has_model(self, sig):
	return sig in self._models

	def get_model(self, sig):
	try:
	file = self._models[sig]
	except KeyError:
	raise RuntimeError(translations["not_found_model_signature"].format(sig=sig))

	if sig in self._checksums: check_checksum(file, self._checksums[sig])
	return states.load_model(file)

	class BagOnlyRepo:
	def __init__(self, root, model_repo):
	self.root = root
	self.model_repo = model_repo
	self.scan()

	def scan(self):
	self._bags = {}
	for filename in os.listdir(self.root):
	filepath = os.path.join(self.root, filename)

	if os.path.isfile(filepath) and os.path.splitext(filename)[1] == ".yaml":
	stem = os.path.splitext(filename)[0]
	self._bags[stem] = filepath

	def get_model(self, name):
	try:
	yaml_file = self._bags[name]
	except KeyError:
	raise RuntimeError(translations["name_not_pretrained"].format(name=name))

	with open(yaml_file, 'r') as f:
	bag = yaml.safe_load(f)

	return apply.BagOfModels([self.model_repo.get_model(sig) for sig in bag["models"]], bag.get("weights"), bag.get("segment"))

	def check_checksum(path, checksum):
	sha = sha256()

	with open(path, "rb") as file:
	while 1:
	buf = file.read(2**20)
	if not buf: break
	sha.update(buf)

	actual_checksum = sha.hexdigest()[:len(checksum)]
	if actual_checksum != checksum: raise RuntimeError(translations["invalid_checksum"].format(path=path, checksum=checksum, actual_checksum=actual_checksum))

	def get_demucs_model(name, repo = None):
	model_repo = LocalRepo(repo)
	return (model_repo.get_model(name) if model_repo.has_model(name) else BagOnlyRepo(repo, model_repo).get_model(name)).eval()