File size: 4,262 Bytes
131da64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# @package _global_

defaults:
  - vq16_t2i
  - override /model: extra_large

data:
  train: combined_tokens
  valid: ${.train}
  precache: false
  streaming: false
  resolution: 256
  block_size: 128
  tokenizer_name_or_path: NousResearch/Llama-2-7b-hf
  wrap: true
  iterable: false
  webdataset_iterable: false
  webdataset_indexed: false
  unpaired: false
  dataset_type: null
  tokens_flip_collate: false
  n_val_samples: null
  n_train_samples: null
  n_duplicate_train: null
  n_duplicate_val: null
  raw_data_dir: null
  save_train_dataloader: true
  save_validation_dataloader: true
  tokenizers_parallelism: false
  token_data_dir: null
  force_disable_shuffle: false
  use_custom_tensordict_collate: true
  use_weighted_tensordict_sampler: true
  force_mp_spawn: false
  enable_cuda_in_tensordict_collate: false
  use_token_dataset: true
  keep_tensordict_on_disk: true
  move_tensordict_to_shm: false
  add_text_to_weighted_sampler: false
  data_dir_train:
  # - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/matrix/HPDv2_image_reward_v1_v2_v3/train
  #   weight: 15.0
  #   name: hpdv2
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/pixelprose_tokens
    weight: 1.0
    name: pixelprose
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/journeydb_train
    weight: 10.0
    name: journeydb_train
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_0_tokens
    weight: 1.0
    name: datacomp0
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_1_tokens
    weight: 1.0
    name: datacomp1
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_2_tokens
    weight: 1.0
    name: datacomp2
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_3_tokens
    weight: 1.0
    name: datacomp3
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_4_tokens
    weight: 1.0
    name: datacomp4
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/datacomp_1b_datacomp1b_5_tokens
    weight: 1.0
    name: datacomp5
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_grogu/datacomp_1b_datacomp1b_6_tokens
    weight: 1.0
    name: datacomp6
  data_dir_val:
  - dir: ${oc.env:DIFFUSION_DATA_DIR}/tokens/07_31_2024_matrix/pixelprose_tokens
    weight: 1.0
    name: dummy_1

model:
  img_length: ${eval:'(${data.resolution} // ${model.downscale_ratio})**2'}
  txt_length: ${eval:'${data.block_size} if ${.unified_model} else 0'}
  length: ${eval:'${.txt_length} + ${.img_length}'}
  unified_model: true
  image_model: true
  text_model: true
  image_model_fid_eval: false
  force_argmax_valid_indices: true
  use_pretrained_img_emb: false
  rope_2d: true
  modality_embed: true
  norm_type: rms
  qk_norm: true
  sandwich_normalization: true
  text_vocab_size: 32001
  
loader:
  batch_size: 8
  eval_batch_size: ${eval:'${.batch_size} // 2'}
  desired_global_batch_size: 512
  persistent_workers: true
  pin_memory: false
  num_workers: 0
  num_eval_workers: 0
eval:
  log_every_n_evals: -1
  log_every_n_fid: -1
  limit_val_batches_manual: 16
  generate_samples: true
  compute_generative_perplexity: false
  perplexity_batch_size: ${loader.eval_batch_size}
  cfg: 5.0
  num_val_metrics_standalone_samples: -1
  num_val_metrics_standalone_batches_per_device: -1
  auto_enhance_reward_config:
    dfn_score: 1.0
    laion_aesthetic_score: 1.0
    
trainer:
  log_flops: false
  log_every_n_steps: 10
  custom_ddp_bf16: true
  log_seperate_modal_losses: true
  limit_val_batches: 16
  softmin_snr: 5
  text_loss_weight: 1.0
  img_loss_weight: 0.6
  use_gradient_checkpointing: false
  ckpt_steps: 20000
  ckpt_every_n_minutes: 180
  ckpt_recent_timeout_minutes: 10
  use_custom_ema: false
  ema: 0.0
  fsdp: true
  restart_on_failure: true
  eval_on_start: false
  val_check_interval: 100000000000
  scale_lr_by_batch_size: false
  watch_gradients: false
  compile: true
  mask_entire_modality: 0.15
  compile_flag_pos_emb: true
  multimodal_batches: true
optim:
  lr: 0.0001
sampling:
  steps: 128
  num_sample_batches: 2
wandb:
  mode: online
checkpointing:
  checkpoints_total_limit: 10
  use_automatic_naming: false
lr_scheduler:
  num_warmup_steps: 10000