Spaces:

PreciousMposa
/

audio

Configuration error

App Files Files Community

audio / demucs /evaluate.py

PreciousMposa

Upload 107 files

519d358 verified about 1 month ago

raw

history blame contribute delete

6.71 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	"""Test time evaluation, either using the original SDR from [Vincent et al. 2006]
	or the newest SDR definition from the MDX 2021 competition (this one will
	be reported as `nsdr` for `new sdr`).
	"""

	from concurrent import futures
	import logging

	from dora.log import LogProgress
	import numpy as np
	import musdb
	import museval
	import torch as th

	from .apply import apply_model
	from .audio import convert_audio, save_audio
	from . import distrib
	from .utils import DummyPoolExecutor


	logger = logging.getLogger(__name__)


	def new_sdr(references, estimates):
	"""
	Compute the SDR according to the MDX challenge definition.
	Adapted from AIcrowd/music-demixing-challenge-starter-kit (MIT license)
	"""
	assert references.dim() == 4
	assert estimates.dim() == 4
	delta = 1e-7 # avoid numerical errors
	num = th.sum(th.square(references), dim=(2, 3))
	den = th.sum(th.square(references - estimates), dim=(2, 3))
	num += delta
	den += delta
	scores = 10 * th.log10(num / den)
	return scores


	def eval_track(references, estimates, win, hop, compute_sdr=True):
	references = references.transpose(1, 2).double()
	estimates = estimates.transpose(1, 2).double()

	new_scores = new_sdr(references.cpu()[None], estimates.cpu()[None])[0]

	if not compute_sdr:
	return None, new_scores
	else:
	references = references.numpy()
	estimates = estimates.numpy()
	scores = museval.metrics.bss_eval(
	references, estimates,
	compute_permutation=False,
	window=win,
	hop=hop,
	framewise_filters=False,
	bsseval_sources_version=False)[:-1]
	return scores, new_scores


	def evaluate(solver, compute_sdr=False):
	"""
	Evaluate model using museval.
	compute_sdr=False means using only the MDX definition of the SDR, which
	is much faster to evaluate.
	"""

	args = solver.args

	output_dir = solver.folder / "results"
	output_dir.mkdir(exist_ok=True, parents=True)
	json_folder = solver.folder / "results/test"
	json_folder.mkdir(exist_ok=True, parents=True)

	# we load tracks from the original musdb set
	if args.test.nonhq is None:
	test_set = musdb.DB(args.dset.musdb, subsets=["test"], is_wav=True)
	else:
	test_set = musdb.DB(args.test.nonhq, subsets=["test"], is_wav=False)
	src_rate = args.dset.musdb_samplerate

	eval_device = 'cpu'

	model = solver.model
	win = int(1. * model.samplerate)
	hop = int(1. * model.samplerate)

	indexes = range(distrib.rank, len(test_set), distrib.world_size)
	indexes = LogProgress(logger, indexes, updates=args.misc.num_prints,
	name='Eval')
	pendings = []

	pool = futures.ProcessPoolExecutor if args.test.workers else DummyPoolExecutor
	with pool(args.test.workers) as pool:
	for index in indexes:
	track = test_set.tracks[index]

	mix = th.from_numpy(track.audio).t().float()
	if mix.dim() == 1:
	mix = mix[None]
	mix = mix.to(solver.device)
	ref = mix.mean(dim=0) # mono mixture
	mix = (mix - ref.mean()) / ref.std()
	mix = convert_audio(mix, src_rate, model.samplerate, model.audio_channels)
	estimates = apply_model(model, mix[None],
	shifts=args.test.shifts, split=args.test.split,
	overlap=args.test.overlap)[0]
	estimates = estimates * ref.std() + ref.mean()
	estimates = estimates.to(eval_device)

	references = th.stack(
	[th.from_numpy(track.targets[name].audio).t() for name in model.sources])
	if references.dim() == 2:
	references = references[:, None]
	references = references.to(eval_device)
	references = convert_audio(references, src_rate,
	model.samplerate, model.audio_channels)
	if args.test.save:
	folder = solver.folder / "wav" / track.name
	folder.mkdir(exist_ok=True, parents=True)
	for name, estimate in zip(model.sources, estimates):
	save_audio(estimate.cpu(), folder / (name + ".mp3"), model.samplerate)

	pendings.append((track.name, pool.submit(
	eval_track, references, estimates, win=win, hop=hop, compute_sdr=compute_sdr)))

	pendings = LogProgress(logger, pendings, updates=args.misc.num_prints,
	name='Eval (BSS)')
	tracks = {}
	for track_name, pending in pendings:
	pending = pending.result()
	scores, nsdrs = pending
	tracks[track_name] = {}
	for idx, target in enumerate(model.sources):
	tracks[track_name][target] = {'nsdr': [float(nsdrs[idx])]}
	if scores is not None:
	(sdr, isr, sir, sar) = scores
	for idx, target in enumerate(model.sources):
	values = {
	"SDR": sdr[idx].tolist(),
	"SIR": sir[idx].tolist(),
	"ISR": isr[idx].tolist(),
	"SAR": sar[idx].tolist()
	}
	tracks[track_name][target].update(values)

	all_tracks = {}
	for src in range(distrib.world_size):
	all_tracks.update(distrib.share(tracks, src))

	result = {}
	metric_names = next(iter(all_tracks.values()))[model.sources[0]]
	for metric_name in metric_names:
	avg = 0
	avg_of_medians = 0
	for source in model.sources:
	medians = [
	np.nanmedian(all_tracks[track][source][metric_name])
	for track in all_tracks.keys()]
	mean = np.mean(medians)
	median = np.median(medians)
	result[metric_name.lower() + "_" + source] = mean
	result[metric_name.lower() + "_med" + "_" + source] = median
	avg += mean / len(model.sources)
	avg_of_medians += median / len(model.sources)
	result[metric_name.lower()] = avg
	result[metric_name.lower() + "_med"] = avg_of_medians
	return result