mtasic85 commited on
Commit
bee417a
·
1 Parent(s): 0f5ef2e

global_batch_size: 256; micro_batch_size: 4

Browse files
Files changed (1) hide show
  1. scripts/pretrain-core-model.yaml +3 -4
scripts/pretrain-core-model.yaml CHANGED
@@ -63,12 +63,11 @@ train:
63
  log_interval: 1
64
 
65
  # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
66
- global_batch_size: 512
67
- # global_batch_size: 256
68
 
69
  # Number of samples per data-parallel rank (type: int, default: 4)
70
- # micro_batch_size: 4
71
- micro_batch_size: 3
72
  # micro_batch_size: 2
73
  # micro_batch_size: 1
74
 
 
63
  log_interval: 1
64
 
65
  # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
66
+ # global_batch_size: 512
67
+ global_batch_size: 256
68
 
69
  # Number of samples per data-parallel rank (type: int, default: 4)
70
+ micro_batch_size: 4
 
71
  # micro_batch_size: 2
72
  # micro_batch_size: 1
73