George-API commited on
Commit
2457cec
·
verified ·
1 Parent(s): eab09f4

Upload run_cloud_training.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_cloud_training.py +9 -0
run_cloud_training.py CHANGED
@@ -515,6 +515,15 @@ def train(config_path, dataset_name, output_dir):
515
  # Initialize model with our safe loading function
516
  logger.info("Loading pre-quantized model safely")
517
  dtype = torch.float16 if hardware_config.get("fp16", True) else None
 
 
 
 
 
 
 
 
 
518
  model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
519
 
520
  # Disable generation capabilities for research training
 
515
  # Initialize model with our safe loading function
516
  logger.info("Loading pre-quantized model safely")
517
  dtype = torch.float16 if hardware_config.get("fp16", True) else None
518
+
519
+ # Force eager attention implementation
520
+ os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
521
+ logger.info("Flash attention has been DISABLED globally via environment variable")
522
+
523
+ # Update hardware config to ensure eager attention
524
+ hardware_config["attn_implementation"] = "eager"
525
+ hardware_config["use_flash_attention"] = False
526
+
527
  model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
528
 
529
  # Disable generation capabilities for research training