George-API commited on
Commit
a69e2f2
·
verified ·
1 Parent(s): 6704e73

Upload run_cloud_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_cloud_training.py +42 -46
run_cloud_training.py CHANGED
@@ -403,7 +403,8 @@ def remove_training_marker():
403
 
404
  def load_model_safely(model_name, max_seq_length, dtype=None, use_flash_attention=False):
405
  """
406
- Load the model with appropriate attention settings based on hardware capability
 
407
  """
408
  logger.info(f"Loading model: {model_name}")
409
 
@@ -421,51 +422,39 @@ def load_model_safely(model_name, max_seq_length, dtype=None, use_flash_attentio
421
  )
422
 
423
  # Force eager implementation to avoid BMGHK format issues
424
- attn_implementation = "eager" # Use eager implementation to avoid BMGHK format issues
425
  logger.info(f"Forcing eager attention implementation to avoid BMGHK format issues")
426
 
427
- # Try loading with unsloth
428
- try:
429
- logger.info("Loading model with unsloth optimizations")
430
- model, tokenizer = FastLanguageModel.from_pretrained(
431
- model_name=model_name,
432
- max_seq_length=max_seq_length,
433
- dtype=dtype,
434
- quantization_config=bnb_config,
435
- attn_implementation=attn_implementation
436
- )
437
- logger.info("Model loaded successfully with unsloth")
438
-
439
- # Explicitly set attention implementation in model config
440
- if hasattr(model, 'config'):
441
- model.config.attn_implementation = attn_implementation
442
- logger.info(f"Explicitly set model config attention implementation to {attn_implementation}")
443
-
444
- return model, tokenizer
445
-
446
- except Exception as e:
447
- logger.warning(f"Unsloth loading failed: {e}")
448
- logger.info("Falling back to standard Hugging Face loading...")
449
-
450
- # Fallback to standard HF loading
451
- config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
452
-
453
- # Set attention implementation in config
454
- config.attn_implementation = attn_implementation
455
-
456
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
457
-
458
- model = AutoModelForCausalLM.from_pretrained(
459
- model_name,
460
- config=config,
461
- device_map="auto",
462
- torch_dtype=dtype or torch.float16,
463
- quantization_config=bnb_config,
464
- trust_remote_code=True,
465
- attn_implementation=attn_implementation
466
- )
467
- logger.info("Model loaded successfully with standard HF loading")
468
- return model, tokenizer
469
 
470
  def train(config_path, dataset_name, output_dir):
471
  """Main training function - RESEARCH TRAINING PHASE ONLY"""
@@ -536,7 +525,10 @@ def train(config_path, dataset_name, output_dir):
536
 
537
  # Initialize model
538
  logger.info("Initializing model (preserving 4-bit quantization)")
539
- max_seq_length = training_config.get("max_seq_length", 2048)
 
 
 
540
 
541
  # Create LoRA config directly
542
  logger.info("Creating LoRA configuration")
@@ -593,10 +585,14 @@ def train(config_path, dataset_name, output_dir):
593
  logger.warning("No reporting backends available - training metrics won't be logged")
594
 
595
  # Set up training arguments with correct parameters
 
 
 
 
596
  training_args_dict = {
597
  "output_dir": output_dir,
598
  "num_train_epochs": training_config.get("num_train_epochs", 3),
599
- "per_device_train_batch_size": training_config.get("per_device_train_batch_size", 2),
600
  "gradient_accumulation_steps": training_config.get("gradient_accumulation_steps", 4),
601
  "learning_rate": training_config.get("learning_rate", 2e-5),
602
  "lr_scheduler_type": training_config.get("lr_scheduler_type", "cosine"),
 
403
 
404
  def load_model_safely(model_name, max_seq_length, dtype=None, use_flash_attention=False):
405
  """
406
+ Load the model directly with HuggingFace, bypassing Unsloth optimizations
407
+ to avoid memory-efficient attention issues
408
  """
409
  logger.info(f"Loading model: {model_name}")
410
 
 
422
  )
423
 
424
  # Force eager implementation to avoid BMGHK format issues
425
+ attn_implementation = "eager"
426
  logger.info(f"Forcing eager attention implementation to avoid BMGHK format issues")
427
 
428
+ # Skip Unsloth and use standard HuggingFace loading
429
+ logger.info("Bypassing Unsloth optimizations to avoid memory-efficient attention issues")
430
+
431
+ # Load with standard HuggingFace
432
+ config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
433
+
434
+ # Set attention implementation in config
435
+ config.attn_implementation = attn_implementation
436
+
437
+ # Disable any custom attention mechanisms
438
+ if hasattr(config, "use_flash_attention"):
439
+ config.use_flash_attention = False
440
+ if hasattr(config, "use_memory_efficient_attention"):
441
+ config.use_memory_efficient_attention = False
442
+
443
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
444
+
445
+ model = AutoModelForCausalLM.from_pretrained(
446
+ model_name,
447
+ config=config,
448
+ device_map="auto",
449
+ torch_dtype=dtype or torch.float16,
450
+ quantization_config=bnb_config,
451
+ trust_remote_code=True,
452
+ attn_implementation=attn_implementation,
453
+ use_flash_attention=False,
454
+ use_memory_efficient_attention=False
455
+ )
456
+ logger.info("Model loaded successfully with standard HF loading")
457
+ return model, tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
458
 
459
  def train(config_path, dataset_name, output_dir):
460
  """Main training function - RESEARCH TRAINING PHASE ONLY"""
 
525
 
526
  # Initialize model
527
  logger.info("Initializing model (preserving 4-bit quantization)")
528
+
529
+ # Reduce max sequence length to avoid memory issues
530
+ max_seq_length = min(training_config.get("max_seq_length", 2048), 1024)
531
+ logger.info(f"Using reduced max sequence length: {max_seq_length} to avoid memory issues")
532
 
533
  # Create LoRA config directly
534
  logger.info("Creating LoRA configuration")
 
585
  logger.warning("No reporting backends available - training metrics won't be logged")
586
 
587
  # Set up training arguments with correct parameters
588
+ # REDUCE BATCH SIZE to avoid memory issues with attention
589
+ per_device_train_batch_size = 1 # Reduced from default of 2
590
+ logger.info(f"Using reduced batch size: {per_device_train_batch_size} to avoid memory issues")
591
+
592
  training_args_dict = {
593
  "output_dir": output_dir,
594
  "num_train_epochs": training_config.get("num_train_epochs", 3),
595
+ "per_device_train_batch_size": per_device_train_batch_size,
596
  "gradient_accumulation_steps": training_config.get("gradient_accumulation_steps", 4),
597
  "learning_rate": training_config.get("learning_rate", 2e-5),
598
  "lr_scheduler_type": training_config.get("lr_scheduler_type", "cosine"),