File size: 1,017 Bytes
1320afb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
vae:
target: direct3d.models.vae.D3D_VAE
params:
triplane_res: 32
triplane_dim: 32
latent_dim: 16
num_freqs: 8
num_attention_heads: 12
attention_head_dim: 64
num_encoder_layers: 8
num_geodecoder_layers: 5
latents_scale: 2.45
dit:
target: direct3d.models.dit.D3D_DiT
params:
attention_bias: true
attention_head_dim: 72
num_attention_heads: 16
semantic_channels: 1024
pixel_channels: 1024
in_channels: 16
out_channels: 16
num_layers: 44
patch_size: 2
sample_size: [32, 96]
semantic_encoder:
target: direct3d.models.condition.ClipImageEncoder
params:
version: openai/clip-vit-large-patch14
pixel_encoder:
target: direct3d.models.condition.DinoEncoder
params:
version: facebook/dinov2-large
scheduler:
target: diffusers.schedulers.EulerAncestralDiscreteScheduler
params:
num_train_timesteps: 1000
beta_start: 0.0001
beta_end: 0.02
beta_schedule: "linear"
prediction_type: "epsilon"
|