tangledgroup
/

tangled-alpha-0.1-core

@@ -67,8 +67,8 @@ train:
   global_batch_size: 256
   # Number of samples per data-parallel rank (type: int, default: 4)
-  micro_batch_size: 4
-  # micro_batch_size: 2
   # micro_batch_size: 1
   # Number of iterations with learning rate warmup active (type: int, default: 2000)

   global_batch_size: 256
   # Number of samples per data-parallel rank (type: int, default: 4)
+  # micro_batch_size: 4
+  micro_batch_size: 2
   # micro_batch_size: 1
   # Number of iterations with learning rate warmup active (type: int, default: 2000)