model:
  model_type: const_sde
  model_name: cond_unet
  image_size: [320, 320]
  input_keys: ['image', 'cond']
  ckpt_path:
  ignore_keys: [ ]
  only_model: False
  timesteps: 1000
  train_sample: -1
  sampling_timesteps: 1
  loss_type: l2
  objective: pred_noise
  start_dist: normal
  perceptual_weight: 0
  scale_factor: 0.3
  scale_by_std: True
  default_scale: True
  scale_by_softsign: False
  eps: !!float 1e-4
  weighting_loss: False
  first_stage:
    embed_dim: 3
    lossconfig:
      disc_start: 50001
      kl_weight: 0.000001
      disc_weight: 0.5
      disc_in_channels: 1
    ddconfig:
      double_z: True
      z_channels: 3
      resolution: [ 320, 320 ]
      in_channels: 1
      out_ch: 1
      ch: 128
      ch_mult: [ 1,2,4 ]  # num_down = len(ch_mult)-1
      num_res_blocks: 2
      attn_resolutions: [ ]
      dropout: 0.0
    ckpt_path:
  unet:
    dim: 128
    cond_net: swin
    without_pretrain: False
    channels: 3
    out_mul: 1
    dim_mults: [ 1, 2, 4, 4, ] # num_down = len(dim_mults)
    cond_in_dim: 3
    cond_dim: 128
    cond_dim_mults: [ 2, 4 ] # num_down = len(cond_dim_mults)
    #    window_sizes1: [ [4, 4], [2, 2], [1, 1], [1, 1] ]
    #    window_sizes2: [ [4, 4], [2, 2], [1, 1], [1, 1] ]
    window_sizes1: [ [ 8, 8 ], [ 4, 4 ], [ 2, 2 ], [ 1, 1 ] ]
    window_sizes2: [ [ 8, 8 ], [ 4, 4 ], [ 2, 2 ], [ 1, 1 ] ]
    fourier_scale: 16
    cond_pe: False
    num_pos_feats: 128
    cond_feature_size: [ 80, 80 ]

data:
  name: edge
  img_folder: '/data/yeyunfan/edge_detection_datasets/datasets/BSDS_test'
  augment_horizontal_flip: True
  batch_size: 8
  num_workers: 4

sampler:
  sample_type: "slide"
  stride: [240, 240]
  batch_size: 1
  sample_num: 300
  use_ema: True
  save_folder:
  ckpt_path: