Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +9 -0
run_cloud_training.py
CHANGED
@@ -515,6 +515,15 @@ def train(config_path, dataset_name, output_dir):
|
|
515 |
# Initialize model with our safe loading function
|
516 |
logger.info("Loading pre-quantized model safely")
|
517 |
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
518 |
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
519 |
|
520 |
# Disable generation capabilities for research training
|
|
|
515 |
# Initialize model with our safe loading function
|
516 |
logger.info("Loading pre-quantized model safely")
|
517 |
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
518 |
+
|
519 |
+
# Force eager attention implementation
|
520 |
+
os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
|
521 |
+
logger.info("Flash attention has been DISABLED globally via environment variable")
|
522 |
+
|
523 |
+
# Update hardware config to ensure eager attention
|
524 |
+
hardware_config["attn_implementation"] = "eager"
|
525 |
+
hardware_config["use_flash_attention"] = False
|
526 |
+
|
527 |
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
528 |
|
529 |
# Disable generation capabilities for research training
|