Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| # Copyright 2014 João Felipe Santos, [email protected] | |
| # | |
| # This file is part of the SRMRpy library, and is licensed under the | |
| # MIT license: https://github.com/jfsantos/SRMRpy/blob/master/LICENSE | |
| from __future__ import division | |
| import numpy as np | |
| #from scipy.signal import hamming | |
| from scipy.signal.windows import hamming | |
| from .hilbert import hilbert | |
| from .modulation_filters import compute_modulation_cfs, modulation_filterbank,\ | |
| modfilt | |
| from gammatone.fftweight import fft_gtgram | |
| from gammatone.filters import centre_freqs, make_erb_filters, erb_filterbank | |
| from scores.srmr.segmentaxis import segment_axis | |
| from scipy.io.wavfile import read as readwav | |
| def calc_erbs(low_freq, fs, n_filters): | |
| ear_q = 9.26449 # Glasberg and Moore Parameters | |
| min_bw = 24.7 | |
| order = 1 | |
| erbs = ((centre_freqs(fs, n_filters, low_freq)/ear_q)**order | |
| + min_bw**order)**(1/order) | |
| return erbs | |
| def calc_cutoffs(cfs, fs, q): | |
| # Calculates cutoff frequencies (3 dB) for 2nd order bandpass | |
| w0 = 2*np.pi*cfs/fs | |
| B0 = np.tan(w0/2)/q | |
| L = cfs - (B0 * fs / (2*np.pi)) | |
| R = cfs + (B0 * fs / (2*np.pi)) | |
| return L, R | |
| def normalize_energy(energy, drange=30.0): | |
| peak_energy = np.max(np.mean(energy, axis=0)) | |
| min_energy = peak_energy*10.0**(-drange/10.0) | |
| energy[energy < min_energy] = min_energy | |
| energy[energy > peak_energy] = peak_energy | |
| return energy | |
| def cal_SRMR(x, fs, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, | |
| fast=True, norm=False): | |
| wLengthS = .256 | |
| wIncS = .064 | |
| # Computing gammatone envelopes | |
| if fast: | |
| mfs = 400.0 | |
| gt_env = fft_gtgram(x, fs, 0.010, 0.0025, n_cochlear_filters, low_freq) | |
| else: | |
| cfs = centre_freqs(fs, n_cochlear_filters, low_freq) | |
| fcoefs = make_erb_filters(fs, cfs) | |
| gt_env = np.abs(hilbert(erb_filterbank(x, fcoefs))) | |
| mfs = fs | |
| wLength = int(np.ceil(wLengthS*mfs)) | |
| wInc = int(np.ceil(wIncS*mfs)) | |
| # Computing modulation filterbank with Q = 2 and 8 channels | |
| mod_filter_cfs = compute_modulation_cfs(min_cf, max_cf, 8) | |
| MF = modulation_filterbank(mod_filter_cfs, mfs, 2) | |
| n_frames = int(1 + (gt_env.shape[1] - wLength)//wInc) | |
| w = hamming(wLength+1)[:-1] # window is periodic, not symmetric | |
| energy = np.zeros((n_cochlear_filters, 8, n_frames)) | |
| for i, ac_ch in enumerate(gt_env): | |
| mod_out = modfilt(MF, ac_ch) | |
| for j, mod_ch in enumerate(mod_out): | |
| mod_out_frame = segment_axis(mod_ch, wLength, | |
| overlap=wLength-wInc, | |
| end='pad') | |
| energy[i, j, :] = np.sum((w*mod_out_frame[:n_frames])**2, axis=1) | |
| if norm: | |
| energy = normalize_energy(energy) | |
| erbs = np.flipud(calc_erbs(low_freq, fs, n_cochlear_filters)) | |
| avg_energy = np.mean(energy, axis=2) | |
| total_energy = np.sum(avg_energy) | |
| AC_energy = np.sum(avg_energy, axis=1) | |
| AC_perc = AC_energy*100/total_energy | |
| AC_perc_cumsum = np.cumsum(np.flipud(AC_perc)) | |
| K90perc_idx = np.where(AC_perc_cumsum > 90)[0][0] | |
| BW = erbs[K90perc_idx] | |
| cutoffs = calc_cutoffs(mod_filter_cfs, fs, 2)[0] | |
| if (BW > cutoffs[4]) and (BW < cutoffs[5]): | |
| Kstar = 5 | |
| elif (BW > cutoffs[5]) and (BW < cutoffs[6]): | |
| Kstar = 6 | |
| elif (BW > cutoffs[6]) and (BW < cutoffs[7]): | |
| Kstar = 7 | |
| elif (BW > cutoffs[7]): | |
| Kstar = 8 | |
| return np.sum(avg_energy[:, :4])/np.sum(avg_energy[:, 4:Kstar]), energy | |
| def process_file(f, args): | |
| fs, s = readwav(f) | |
| if len(s.shape) > 1: | |
| s = s[:, 0] | |
| if np.issubdtype(s.dtype, np.int): | |
| s = s.astype('float')/np.iinfo(s.dtype).max | |
| r, energy = srmr( | |
| s, fs, n_cochlear_filters=args.n_cochlear_filters, | |
| min_cf=args.min_cf, | |
| max_cf=args.max_cf, | |
| fast=args.fast, | |
| norm=args.norm) | |
| return f, r | |
| def main(): | |
| import argparse | |
| import multiprocessing | |
| import functools | |
| parser = argparse.ArgumentParser( | |
| description='Compute the SRMR metric for a given WAV file') | |
| parser.add_argument( | |
| '-f', '--fast', dest='fast', action='store_true', default=False, | |
| help='Use the faster version based on the gammatonegram') | |
| parser.add_argument( | |
| '-n', '--norm', dest='norm', action='store_true', default=False, | |
| help='Use modulation spectrum energy normalization') | |
| parser.add_argument( | |
| '--ncochlearfilters', dest='n_cochlear_filters', type=int, default=23, | |
| help='Number of filters in the acoustic filterbank') | |
| parser.add_argument( | |
| '--mincf', dest='min_cf', type=float, default=4.0, | |
| help='Center frequency of the first modulation filter') | |
| parser.add_argument( | |
| '--maxcf', dest='max_cf', type=float, default=128.0, | |
| help='Center frequency of the last modulation filter') | |
| parser.add_argument( | |
| 'path', metavar='path', nargs='+', | |
| help='Path of the file or files to be processed.' | |
| ' Can also be a folder.') | |
| args = parser.parse_args() | |
| if len(args.path) > 1: | |
| p = multiprocessing.Pool(multiprocessing.cpu_count()) | |
| results = dict(p.map(functools.partial(process_file, args=args), | |
| args.path)) | |
| for f in args.path: | |
| print('{}: {}'.format(f, results[f])) | |
| else: | |
| f, r = process_file(args.path[0], args) | |
| print('{}: {}'.format(f, r)) | |
| if __name__ == '__main__': | |
| main() | |