|
{ |
|
"data": { |
|
"aspect_ratio_range": [0.5, 2.0], |
|
"area_range": [250000, 1000000], |
|
"clamp_max_depth": 1000.0, |
|
"center_augmentation": 0.5, |
|
"fov_range_absolute": [1, 179], |
|
"fov_range_relative": [0.01, 1.0], |
|
"image_augmentation": ["jittering", "jpeg_loss", "blurring"], |
|
"datasets": [ |
|
{ |
|
"name": "TartanAir", |
|
"path": "blobmnt/data_v3/TartanAir", |
|
"label_type": "synthetic", |
|
"index": ".index.txt", |
|
"depth": "depth.png", |
|
"weight": 4.8, |
|
"center_augmentation": 0.25, |
|
"fov_range_absolute": [30, 150], |
|
"fov_range_relative": [0.5, 1.0], |
|
"image_augmentation": ["jittering", "jpeg_loss", "blurring", "shot_noise"] |
|
} |
|
] |
|
}, |
|
"model_version": "v1", |
|
"model": { |
|
"encoder": "dinov2_vitl14", |
|
"remap_output": "exp", |
|
"intermediate_layers": 4, |
|
"dim_upsample": [256, 128, 64], |
|
"dim_times_res_block_hidden": 2, |
|
"num_res_blocks": 2, |
|
"num_tokens_range": [1200, 2500], |
|
"last_conv_channels": 32, |
|
"last_conv_size": 1 |
|
}, |
|
"optimizer": { |
|
"type": "AdamW", |
|
"params": [ |
|
{"params": {"include": ["*"], "exclude": ["*backbone.*"]}, "lr": 1e-4}, |
|
{"params": {"include": ["*backbone.*"]}, "lr": 1e-5} |
|
] |
|
}, |
|
"lr_scheduler": { |
|
"type": "SequentialLR", |
|
"params": { |
|
"schedulers": [ |
|
{"type": "LambdaLR", "params": {"lr_lambda": ["1.0", "max(0.0, min(1.0, (epoch - 1000) / 1000))"]}}, |
|
{"type": "StepLR", "params": {"step_size": 25000, "gamma": 0.5}} |
|
], |
|
"milestones": [2000] |
|
} |
|
}, |
|
"low_resolution_training_steps": 50000, |
|
"loss": { |
|
"invalid": {}, |
|
"synthetic": { |
|
"global": {"function": "affine_invariant_global_loss", "weight": 1.0, "params": {"align_resolution": 32}}, |
|
"patch_4": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 4, "align_resolution": 16, "num_patches": 16}}, |
|
"patch_16": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 16, "align_resolution": 8, "num_patches": 256}}, |
|
"patch_64": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 64, "align_resolution": 4, "num_patches": 4096}}, |
|
"normal": {"function": "normal_loss", "weight": 1.0}, |
|
"mask": {"function": "mask_l2_loss", "weight": 1.0} |
|
}, |
|
"sfm": { |
|
"global": {"function": "affine_invariant_global_loss", "weight": 1.0, "params": {"align_resolution": 32}}, |
|
"patch_4": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 4, "align_resolution": 16, "num_patches": 16}}, |
|
"patch_16": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 16, "align_resolution": 8, "num_patches": 256}}, |
|
"mask": {"function": "mask_l2_loss", "weight": 1.0} |
|
}, |
|
"lidar": { |
|
"global": {"function": "affine_invariant_global_loss", "weight": 1.0, "params": {"align_resolution": 32}}, |
|
"patch_4": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 4, "align_resolution": 16, "num_patches": 16}}, |
|
"mask": {"function": "mask_l2_loss", "weight": 1.0} |
|
} |
|
} |
|
} |