Seed-VC

Running on Zero

App Files Files Community

Seed-VC / configs /v2 /vc_wrapper.yaml

Plachta

Upload 116 files

56a1295 verified 4 months ago

raw

history blame contribute delete

3 kB

	_target_: modules.v2.vc_wrapper.VoiceConversionWrapper
	sr: 22050
	hop_size: 256
	mel_fn:
	_target_: modules.audio.mel_spectrogram
	_partial_: true
	n_fft: 1024
	win_size: 1024
	hop_size: 256
	num_mels: 80
	sampling_rate: 22050
	fmin: 0
	fmax: null
	center: False
	cfm:
	_target_: modules.v2.cfm.CFM
	estimator:
	_target_: modules.v2.dit_wrapper.DiT
	time_as_token: true
	style_as_token: true
	uvit_skip_connection: false
	block_size: 8192
	depth: 13
	num_heads: 8
	hidden_dim: 512
	in_channels: 80
	content_dim: 512
	style_encoder_dim: 192
	class_dropout_prob: 0.1
	dropout_rate: 0.0
	attn_dropout_rate: 0.0
	cfm_length_regulator:
	_target_: modules.v2.length_regulator.InterpolateRegulator
	channels: 512
	is_discrete: true
	codebook_size: 2048
	sampling_ratios: [ 1, 1, 1, 1 ]
	f0_condition: false
	ar:
	_target_: modules.v2.ar.NaiveWrapper
	model:
	_target_: modules.v2.ar.NaiveTransformer
	config:
	_target_: modules.v2.ar.NaiveModelArgs
	dropout: 0.0
	rope_base: 10000.0
	dim: 768
	head_dim: 64
	n_local_heads: 2
	intermediate_size: 2304
	n_head: 12
	n_layer: 12
	vocab_size: 2049 # 1 + 1 for eos
	ar_length_regulator:
	_target_: modules.v2.length_regulator.InterpolateRegulator
	channels: 768
	is_discrete: true
	codebook_size: 32
	sampling_ratios: [ ]
	f0_condition: false
	style_encoder:
	_target_: modules.campplus.DTDNN.CAMPPlus
	feat_dim: 80
	embedding_size: 192
	content_extractor_narrow:
	_target_: modules.astral_quantization.default_model.AstralQuantizer
	tokenizer_name: "openai/whisper-small"
	ssl_model_name: "facebook/hubert-large-ll60k"
	ssl_output_layer: 18
	skip_ssl: true
	encoder: &bottleneck_encoder
	_target_: modules.astral_quantization.convnext.ConvNeXtV2Stage
	dim: 512
	num_blocks: 12
	intermediate_dim: 1536
	dilation: 1
	input_dim: 1024
	quantizer:
	_target_: modules.astral_quantization.bsq.BinarySphericalQuantize
	codebook_size: 32 # codebook size, must be a power of 2
	dim: 512
	entropy_loss_weight: 0.1
	diversity_gamma: 1.0
	spherical: True
	enable_entropy_loss: True
	soft_entropy_loss: True
	content_extractor_wide:
	_target_: modules.astral_quantization.default_model.AstralQuantizer
	tokenizer_name: "openai/whisper-small"
	ssl_model_name: "facebook/hubert-large-ll60k"
	ssl_output_layer: 18
	encoder: *bottleneck_encoder
	quantizer:
	_target_: modules.astral_quantization.bsq.BinarySphericalQuantize
	codebook_size: 2048 # codebook size, must be a power of 2
	dim: 512
	entropy_loss_weight: 0.1
	diversity_gamma: 1.0
	spherical: True
	enable_entropy_loss: True
	soft_entropy_loss: True
	vocoder:
	_target_: modules.bigvgan.bigvgan.BigVGAN.from_pretrained
	pretrained_model_name_or_path: "nvidia/bigvgan_v2_22khz_80band_256x"
	use_cuda_kernel: false