File size: 4,145 Bytes
8866644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
List of all VAE configs, with training parts stripped.
"""
vae_conf = {
	### AutoencoderKL ###
	"kl-f4": {
		"type"             : "AutoencoderKL",
		"embed_scale"      : 4,
		"embed_dim"        : 3,
		"z_channels"       : 3,
		"double_z"         : True,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,2,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [],
	},
	"kl-f8": { # Default SD1.5 VAE
		"type"             : "AutoencoderKL",
		"embed_scale"      : 8,
		"embed_dim"        : 4,
		"z_channels"       : 4,
		"double_z"         : True,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,2,4,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [],
	},
	"kl-f8-d16": { # 16 channel VAE from https://huggingface.co/ostris/vae-kl-f8-d16/tree/main
		"type"             : "AutoencoderKL",
		"embed_scale"      : 8,
		"embed_dim"        : 16,
		"z_channels"       : 16,
		"double_z"         : True,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,1,2,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [],
	},
	"kl-f16": {
		"type"             : "AutoencoderKL",
		"embed_scale"      : 16,
		"embed_dim"        : 16,
		"z_channels"       : 16,
		"double_z"         : True,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,1,2,2,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [16],
	},
	"kl-f32": {
		"type"             : "AutoencoderKL",
		"embed_scale"      : 32,
		"embed_dim"        : 64,
		"z_channels"       : 64,
		"double_z"         : True,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,1,2,2,4,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [16,8],
	},
	### VQModel ###
	"vq-f4": {
		"type"             : "VQModel",
		"embed_scale"      : 4,
		"n_embed"          : 8192,
		"embed_dim"        : 3,
		"z_channels"       : 3,
		"double_z"         : False,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,2,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [],
	},
	"vq-f8": {
		"type"             : "VQModel",
		"embed_scale"      : 8,
		"n_embed"          : 16384,
		"embed_dim"        : 4,
		"z_channels"       : 4,
		"double_z"         : False,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,2,2,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [32],
	},
	"vq-f16": {
		"type"             : "VQModel",
		"embed_scale"      : 16,
		"n_embed"          : 16384,
		"embed_dim"        : 8,
		"z_channels"       : 8,
		"double_z"         : False,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,1,2,2,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [16],
	},
	# OpenAI Consistency Decoder
	"Consistency-Decoder": {
		"type" : "ConsistencyDecoder",
		"embed_scale"      : 8,
		"embed_dim"        : 4,
	},
	# SAI Video Decoder
	"SDV-VideoDecoder": {
		"type"             : "AutoencoderKL-VideoDecoder",
		"embed_scale"      : 8,
		"embed_dim"        : 4,
		"z_channels"       : 4,
		"double_z"         : True,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 128, 
		"ch_mult"          : [1,2,4,4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [],
		"video_kernel_size": [3, 1, 1]
	},
	# Kandinsky-3
	"MoVQ3": {
		"type"             : "MoVQ3",
		"embed_scale"      : 8,
		"embed_dim"        : 4,
		"double_z"         : False,
		"z_channels"       : 4,
		"resolution"       : 256,
		"in_channels"      : 3,
		"out_ch"           : 3,
		"ch"               : 256,
		"ch_mult"          : [1, 2, 2, 4],
		"num_res_blocks"   : 2,
		"attn_resolutions" : [32],
    }
}