|
model: |
|
name: bbox_gen |
|
args: |
|
encoder_dim_feat: 448 |
|
encoder_dim: 64 |
|
encoder_heads: 4 |
|
encoder_token_num: 2048 |
|
encoder_qkv_bias: false |
|
encoder_use_ln_post: true |
|
encoder_use_checkpoint: true |
|
encoder_num_embed_freqs: 8 |
|
encoder_embed_include_pi: false |
|
encoder_init_scale: 0.25 |
|
encoder_random_fps: true |
|
encoder_learnable_query: true |
|
encoder_layers: 8 |
|
|
|
max_group_size: 50 |
|
|
|
vocab_size: 67 |
|
decoder_hidden_size: 1024 |
|
decoder_num_hidden_layers: 24 |
|
decoder_ffn_dim: 4096 |
|
decoder_heads: 16 |
|
decoder_use_flash_attention: true |
|
decoder_gradient_checkpointing: false |
|
|
|
bins: 64 |
|
BOS_id: 64 |
|
EOS_id: 65 |
|
PAD_id: 66 |
|
max_length: 2187 |
|
voxel_token_length: 1886 |
|
voxel_token_placeholder: -1 |