Spaces:

Flux9665
/

MassivelyMultilingualTTS

Running on T4

App Files Files

MassivelyMultilingualTTS / Utility /corpus_preparation.py

Flux9665

use explicit code instead of relying on release download

9e275b8 over 1 year ago

raw

history blame

4.45 kB

	import torch.multiprocessing

	from Architectures.Aligner.CodecAlignerDataset import CodecAlignerDataset
	from Architectures.Aligner.autoaligner_train_loop import train_loop as train_aligner
	from Architectures.ToucanTTS.TTSDataset import TTSDataset
	from Utility.path_to_transcript_dicts import *
	from Utility.storage_config import MODELS_DIR


	def prepare_aligner_corpus(transcript_dict, corpus_dir, lang, device, phone_input=False,
	gpu_count=1,
	rank=0):
	return CodecAlignerDataset(transcript_dict,
	cache_dir=corpus_dir,
	lang=lang,
	loading_processes=10, # this can be increased for massive clusters, but the overheads that are introduced are kind of not really worth it
	device=device,
	phone_input=phone_input,
	gpu_count=gpu_count,
	rank=rank)


	def prepare_tts_corpus(transcript_dict,
	corpus_dir,
	lang,
	# For small datasets it's best to turn this off and instead inspect the data with the scorer, if there are any issues.
	fine_tune_aligner=True,
	use_reconstruction=True,
	phone_input=False,
	save_imgs=False,
	gpu_count=1,
	rank=0):
	"""
	create an aligner dataset,
	fine-tune an aligner,
	create a TTS dataset,
	return it.

	Automatically skips parts that have been done before.
	"""
	if not os.path.exists(os.path.join(corpus_dir, "tts_train_cache.pt")):
	if fine_tune_aligner:
	aligner_dir = os.path.join(corpus_dir, "Aligner")
	aligner_loc = os.path.join(corpus_dir, "Aligner", "aligner.pt")

	if not os.path.exists(os.path.join(corpus_dir, "aligner_train_cache.pt")):
	prepare_aligner_corpus(transcript_dict, corpus_dir=corpus_dir, lang=lang, phone_input=phone_input, device=torch.device("cuda"))

	if not os.path.exists(os.path.join(aligner_dir, "aligner.pt")):
	aligner_datapoints = prepare_aligner_corpus(transcript_dict, corpus_dir=corpus_dir, lang=lang, phone_input=phone_input, device=torch.device("cuda"))
	if os.path.exists(os.path.join(MODELS_DIR, "Aligner", "aligner.pt")):
	train_aligner(train_dataset=aligner_datapoints,
	device=torch.device("cuda"),
	save_directory=aligner_dir,
	steps=min(len(aligner_datapoints) // 2, 10000), # relatively good finetuning heuristic
	batch_size=32 if len(aligner_datapoints) > 32 else len(aligner_datapoints) // 2,
	path_to_checkpoint=os.path.join(MODELS_DIR, "Aligner", "aligner.pt"),
	fine_tune=True,
	debug_img_path=aligner_dir,
	resume=False,
	use_reconstruction=use_reconstruction)
	else:
	train_aligner(train_dataset=aligner_datapoints,
	device=torch.device("cuda"),
	save_directory=aligner_dir,
	steps=len(aligner_datapoints) // 2, # relatively good heuristic
	batch_size=32 if len(aligner_datapoints) > 32 else len(aligner_datapoints) // 2,
	path_to_checkpoint=None,
	fine_tune=False,
	debug_img_path=aligner_dir,
	resume=False,
	use_reconstruction=use_reconstruction)
	else:
	aligner_loc = os.path.join(MODELS_DIR, "Aligner", "aligner.pt")
	else:
	aligner_loc = None
	return TTSDataset(transcript_dict,
	acoustic_checkpoint_path=aligner_loc,
	cache_dir=corpus_dir,
	device=torch.device("cuda"),
	lang=lang,
	save_imgs=save_imgs,
	gpu_count=gpu_count,
	rank=rank)