|
name: sampler |
|
use_tb_logger: true |
|
set_CUDA_VISIBLE_DEVICES: ~ |
|
gpu_ids: [3] |
|
|
|
|
|
batch_size: 4 |
|
num_workers: 1 |
|
train_img_dir: ./datasets/train_images |
|
test_img_dir: ./datasets/test_images |
|
segm_dir: ./datasets/segm |
|
pose_dir: ./datasets/densepose |
|
train_ann_file: ./datasets/texture_ann/train |
|
val_ann_file: ./datasets/texture_ann/val |
|
test_ann_file: ./datasets/texture_ann/test |
|
downsample_factor: 2 |
|
|
|
|
|
img_ae_path: ./pretrained_models/vqvae_top.pth |
|
segm_ae_path: ./pretrained_models/parsing_token.pth |
|
|
|
model_type: TransformerTextureAwareModel |
|
|
|
|
|
|
|
img_embed_dim: 256 |
|
img_n_embed: 1024 |
|
img_double_z: false |
|
img_z_channels: 256 |
|
img_resolution: 512 |
|
img_in_channels: 3 |
|
img_out_ch: 3 |
|
img_ch: 128 |
|
img_ch_mult: [1, 1, 2, 2, 4] |
|
img_num_res_blocks: 2 |
|
img_attn_resolutions: [32] |
|
img_dropout: 0.0 |
|
|
|
|
|
segm_double_z: false |
|
segm_z_channels: 32 |
|
segm_resolution: 512 |
|
segm_in_channels: 24 |
|
segm_out_ch: 24 |
|
segm_ch: 64 |
|
segm_ch_mult: [1, 1, 2, 2, 4] |
|
segm_num_res_blocks: 1 |
|
segm_attn_resolutions: [16] |
|
segm_dropout: 0.0 |
|
segm_num_segm_classes: 24 |
|
segm_n_embed: 1024 |
|
segm_embed_dim: 32 |
|
|
|
|
|
codebook_size: 18432 |
|
segm_codebook_size: 1024 |
|
texture_codebook_size: 18 |
|
bert_n_emb: 512 |
|
bert_n_layers: 24 |
|
bert_n_head: 8 |
|
block_size: 512 |
|
latent_shape: [32, 16] |
|
embd_pdrop: 0.0 |
|
resid_pdrop: 0.0 |
|
attn_pdrop: 0.0 |
|
num_head: 18 |
|
|
|
|
|
loss_type: reweighted_elbo |
|
mask_schedule: random |
|
|
|
sample_steps: 256 |
|
|
|
|
|
val_freq: 5 |
|
print_freq: 100 |
|
weight_decay: 0 |
|
manual_seed: 2021 |
|
num_epochs: 100 |
|
lr: !!float 1e-4 |
|
lr_decay: step |
|
gamma: 1.0 |
|
step: 50 |
|
|