Direct3D / config.yaml
DreamTechAI's picture
Upload config.yaml with huggingface_hub
1320afb verified
vae:
target: direct3d.models.vae.D3D_VAE
params:
triplane_res: 32
triplane_dim: 32
latent_dim: 16
num_freqs: 8
num_attention_heads: 12
attention_head_dim: 64
num_encoder_layers: 8
num_geodecoder_layers: 5
latents_scale: 2.45
dit:
target: direct3d.models.dit.D3D_DiT
params:
attention_bias: true
attention_head_dim: 72
num_attention_heads: 16
semantic_channels: 1024
pixel_channels: 1024
in_channels: 16
out_channels: 16
num_layers: 44
patch_size: 2
sample_size: [32, 96]
semantic_encoder:
target: direct3d.models.condition.ClipImageEncoder
params:
version: openai/clip-vit-large-patch14
pixel_encoder:
target: direct3d.models.condition.DinoEncoder
params:
version: facebook/dinov2-large
scheduler:
target: diffusers.schedulers.EulerAncestralDiscreteScheduler
params:
num_train_timesteps: 1000
beta_start: 0.0001
beta_end: 0.02
beta_schedule: "linear"
prediction_type: "epsilon"