JasonSmithSO's picture
Upload 578 files
8866644 verified
"""
List of all VAE configs, with training parts stripped.
"""
vae_conf = {
### AutoencoderKL ###
"kl-f4": {
"type" : "AutoencoderKL",
"embed_scale" : 4,
"embed_dim" : 3,
"z_channels" : 3,
"double_z" : True,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,2,4],
"num_res_blocks" : 2,
"attn_resolutions" : [],
},
"kl-f8": { # Default SD1.5 VAE
"type" : "AutoencoderKL",
"embed_scale" : 8,
"embed_dim" : 4,
"z_channels" : 4,
"double_z" : True,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,2,4,4],
"num_res_blocks" : 2,
"attn_resolutions" : [],
},
"kl-f8-d16": { # 16 channel VAE from https://huggingface.co/ostris/vae-kl-f8-d16/tree/main
"type" : "AutoencoderKL",
"embed_scale" : 8,
"embed_dim" : 16,
"z_channels" : 16,
"double_z" : True,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,1,2,4],
"num_res_blocks" : 2,
"attn_resolutions" : [],
},
"kl-f16": {
"type" : "AutoencoderKL",
"embed_scale" : 16,
"embed_dim" : 16,
"z_channels" : 16,
"double_z" : True,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,1,2,2,4],
"num_res_blocks" : 2,
"attn_resolutions" : [16],
},
"kl-f32": {
"type" : "AutoencoderKL",
"embed_scale" : 32,
"embed_dim" : 64,
"z_channels" : 64,
"double_z" : True,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,1,2,2,4,4],
"num_res_blocks" : 2,
"attn_resolutions" : [16,8],
},
### VQModel ###
"vq-f4": {
"type" : "VQModel",
"embed_scale" : 4,
"n_embed" : 8192,
"embed_dim" : 3,
"z_channels" : 3,
"double_z" : False,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,2,4],
"num_res_blocks" : 2,
"attn_resolutions" : [],
},
"vq-f8": {
"type" : "VQModel",
"embed_scale" : 8,
"n_embed" : 16384,
"embed_dim" : 4,
"z_channels" : 4,
"double_z" : False,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,2,2,4],
"num_res_blocks" : 2,
"attn_resolutions" : [32],
},
"vq-f16": {
"type" : "VQModel",
"embed_scale" : 16,
"n_embed" : 16384,
"embed_dim" : 8,
"z_channels" : 8,
"double_z" : False,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,1,2,2,4],
"num_res_blocks" : 2,
"attn_resolutions" : [16],
},
# OpenAI Consistency Decoder
"Consistency-Decoder": {
"type" : "ConsistencyDecoder",
"embed_scale" : 8,
"embed_dim" : 4,
},
# SAI Video Decoder
"SDV-VideoDecoder": {
"type" : "AutoencoderKL-VideoDecoder",
"embed_scale" : 8,
"embed_dim" : 4,
"z_channels" : 4,
"double_z" : True,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 128,
"ch_mult" : [1,2,4,4],
"num_res_blocks" : 2,
"attn_resolutions" : [],
"video_kernel_size": [3, 1, 1]
},
# Kandinsky-3
"MoVQ3": {
"type" : "MoVQ3",
"embed_scale" : 8,
"embed_dim" : 4,
"double_z" : False,
"z_channels" : 4,
"resolution" : 256,
"in_channels" : 3,
"out_ch" : 3,
"ch" : 256,
"ch_mult" : [1, 2, 2, 4],
"num_res_blocks" : 2,
"attn_resolutions" : [32],
}
}