George-API commited on
Commit
f3ab403
·
verified ·
1 Parent(s): f374bb6

Upload run_cloud_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_cloud_training.py +1 -1
run_cloud_training.py CHANGED
@@ -603,7 +603,7 @@ def train(config_path, dataset_name, output_dir):
603
  # For L40S GPU, we can use a larger batch size and shard model across the single GPU
604
  if "L40S" in gpu_info.name or gpu_info.total_memory > 40e9: # Check if it's L40S (>40GB VRAM)
605
  logger.info("Detected L40S GPU - optimizing for high-memory GPU")
606
- per_device_train_batch_size = training_config.get("per_device_train_batch_size", 6)
607
  logger.info(f"Using optimized batch size for L40S: {per_device_train_batch_size}")
608
  else:
609
  # Default to a smaller batch size for other GPUs
 
603
  # For L40S GPU, we can use a larger batch size and shard model across the single GPU
604
  if "L40S" in gpu_info.name or gpu_info.total_memory > 40e9: # Check if it's L40S (>40GB VRAM)
605
  logger.info("Detected L40S GPU - optimizing for high-memory GPU")
606
+ per_device_train_batch_size = training_config.get("per_device_train_batch_size", 4)
607
  logger.info(f"Using optimized batch size for L40S: {per_device_train_batch_size}")
608
  else:
609
  # Default to a smaller batch size for other GPUs