Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +8 -0
run_cloud_training.py
CHANGED
@@ -21,6 +21,9 @@ from transformers.data.data_collator import DataCollatorMixin
|
|
21 |
from peft import LoraConfig
|
22 |
from unsloth import FastLanguageModel
|
23 |
|
|
|
|
|
|
|
24 |
# Disable flash attention globally
|
25 |
os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
|
26 |
|
@@ -514,6 +517,11 @@ def train(config_path, dataset_name, output_dir):
|
|
514 |
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
515 |
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
516 |
|
|
|
|
|
|
|
|
|
|
|
517 |
# Try different approaches to apply LoRA
|
518 |
logger.info("Applying LoRA to model")
|
519 |
|
|
|
21 |
from peft import LoraConfig
|
22 |
from unsloth import FastLanguageModel
|
23 |
|
24 |
+
# Configure PyTorch memory allocator for better memory management
|
25 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
26 |
+
|
27 |
# Disable flash attention globally
|
28 |
os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
|
29 |
|
|
|
517 |
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
518 |
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
519 |
|
520 |
+
# Disable generation capabilities for research training
|
521 |
+
logger.info("Disabling generation capabilities - Research training only")
|
522 |
+
model.config.is_decoder = False
|
523 |
+
model.config.task_specific_params = None
|
524 |
+
|
525 |
# Try different approaches to apply LoRA
|
526 |
logger.info("Applying LoRA to model")
|
527 |
|