Spaces:
Runtime error
Runtime error
| from speaker_encoder import inference as encoder | |
| from multiprocessing.pool import Pool | |
| from functools import partial | |
| from pathlib import Path | |
| # from utils import logmmse | |
| # from tqdm import tqdm | |
| # import numpy as np | |
| # import librosa | |
| def embed_utterance(fpaths, encoder_model_fpath): | |
| if not encoder.is_loaded(): | |
| encoder.load_model(encoder_model_fpath) | |
| # Compute the speaker embedding of the utterance | |
| wav_fpath, embed_fpath = fpaths | |
| wav = np.load(wav_fpath) | |
| wav = encoder.preprocess_wav(wav) | |
| embed = encoder.embed_utterance(wav) | |
| np.save(embed_fpath, embed, allow_pickle=False) | |
| def create_embeddings(outdir_root: Path, wav_dir: Path, encoder_model_fpath: Path, n_processes: int): | |
| wav_dir = outdir_root.joinpath("audio") | |
| metadata_fpath = synthesizer_root.joinpath("train.txt") | |
| assert wav_dir.exists() and metadata_fpath.exists() | |
| embed_dir = synthesizer_root.joinpath("embeds") | |
| embed_dir.mkdir(exist_ok=True) | |
| # Gather the input wave filepath and the target output embed filepath | |
| with metadata_fpath.open("r") as metadata_file: | |
| metadata = [line.split("|") for line in metadata_file] | |
| fpaths = [(wav_dir.joinpath(m[0]), embed_dir.joinpath(m[2])) for m in metadata] | |
| # TODO: improve on the multiprocessing, it's terrible. Disk I/O is the bottleneck here. | |
| # Embed the utterances in separate threads | |
| func = partial(embed_utterance, encoder_model_fpath=encoder_model_fpath) | |
| job = Pool(n_processes).imap(func, fpaths) | |
| list(tqdm(job, "Embedding", len(fpaths), unit="utterances")) |