# train_llama4.py from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig import datasets import torch from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from accelerate import Accelerator import huggingface_hub import os print("Running train_llama4.py with CPU offloading (version: 2025-04-22 v1)") # — Authenticate with Hugging Face LLAMA = os.getenv("LLama") if not LLAMA: raise ValueError("LLama token not found. Set it in environment as 'LLama'.") huggingface_hub.login(token=LLAMA) # — Tokenizer MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.add_special_tokens({'pad_token': '[PAD]'}) # — Quantization + CPU off‑load config quant_config = BitsAndBytesConfig( load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True ) print("Loading model with 8-bit quantization, CPU offload, and automatic device mapping") model = Llama4ForConditionalGeneration.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto", quantization_config=quant_config, offload_folder="./offload" ) # — Resize embeddings if pad was added model.resize_token_embeddings(len(tokenizer)) # — Accelerator prep accelerator = Accelerator() model = accelerator.prepare(model) # — Load training data dataset = datasets.load_dataset('json', data_files="Bingaman_training_data.json")['train'] # — LoRA setup lora_config = LoraConfig( r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) model = prepare_model_for_kbit_training(model) model = get_peft_model(model, lora_config) # — Training arguments training_args = { "output_dir": "./results", "num_train_epochs": 1, "per_device_train_batch_size": 2, "gradient_accumulation_steps": 8, "optim": "adamw_torch", "save_steps": 500, "logging_steps": 100, "learning_rate": 2e-4, "fp16": True, "max_grad_norm": 0.3, "warmup_ratio": 0.03, "lr_scheduler_type": "cosine" } # — Initialize Trainer via Accelerate trainer = accelerator.prepare( datasets.Trainer( model=model, args=datasets.TrainingArguments(**training_args), train_dataset=dataset ) ) # — Run training trainer.train() model.save_pretrained("./fine_tuned_model") print("Training completed!")