Spaces:

George-API
/

qwen4bit

Running

George-API commited on Mar 11

Commit

5c3991d

verified ·

1 Parent(s): d455422

Upload run_cloud_training.py with huggingface_hub

Files changed (1) hide show

run_cloud_training.py CHANGED Viewed

@@ -21,6 +21,9 @@ from transformers.data.data_collator import DataCollatorMixin
 from peft import LoraConfig
 from unsloth import FastLanguageModel
 # Disable flash attention globally
 os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
@@ -514,6 +517,11 @@ def train(config_path, dataset_name, output_dir):
         dtype = torch.float16 if hardware_config.get("fp16", True) else None
         model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
         # Try different approaches to apply LoRA
         logger.info("Applying LoRA to model")

 from peft import LoraConfig
 from unsloth import FastLanguageModel
+# Configure PyTorch memory allocator for better memory management
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 # Disable flash attention globally
 os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
         dtype = torch.float16 if hardware_config.get("fp16", True) else None
         model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
+        # Disable generation capabilities for research training
+        logger.info("Disabling generation capabilities - Research training only")
+        model.config.is_decoder = False
+        model.config.task_specific_params = None
         # Try different approaches to apply LoRA
         logger.info("Applying LoRA to model")