|
|
|
|
|
from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig |
|
import datasets |
|
import torch |
|
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training |
|
from accelerate import Accelerator |
|
import huggingface_hub |
|
import os |
|
|
|
print("Running train_llama4.py with CPU offloading (version: 2025-04-22 v1)") |
|
|
|
|
|
LLAMA = os.getenv("LLama") |
|
if not LLAMA: |
|
raise ValueError("LLama token not found. Set it in environment as 'LLama'.") |
|
huggingface_hub.login(token=LLAMA) |
|
|
|
|
|
MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct" |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
|
if tokenizer.pad_token is None: |
|
tokenizer.add_special_tokens({'pad_token': '[PAD]'}) |
|
|
|
|
|
quant_config = BitsAndBytesConfig( |
|
load_in_8bit=True, |
|
llm_int8_enable_fp32_cpu_offload=True |
|
) |
|
|
|
print("Loading model with 8-bit quantization, CPU offload, and automatic device mapping") |
|
model = Llama4ForConditionalGeneration.from_pretrained( |
|
MODEL_ID, |
|
torch_dtype=torch.bfloat16, |
|
device_map="auto", |
|
quantization_config=quant_config, |
|
offload_folder="./offload" |
|
) |
|
|
|
|
|
model.resize_token_embeddings(len(tokenizer)) |
|
|
|
|
|
accelerator = Accelerator() |
|
model = accelerator.prepare(model) |
|
|
|
|
|
dataset = datasets.load_dataset('json', data_files="Bingaman_training_data.json")['train'] |
|
|
|
|
|
lora_config = LoraConfig( |
|
r=16, |
|
lora_alpha=32, |
|
target_modules=["q_proj", "v_proj"], |
|
lora_dropout=0.05, |
|
bias="none", |
|
task_type="CAUSAL_LM" |
|
) |
|
model = prepare_model_for_kbit_training(model) |
|
model = get_peft_model(model, lora_config) |
|
|
|
|
|
training_args = { |
|
"output_dir": "./results", |
|
"num_train_epochs": 1, |
|
"per_device_train_batch_size": 2, |
|
"gradient_accumulation_steps": 8, |
|
"optim": "adamw_torch", |
|
"save_steps": 500, |
|
"logging_steps": 100, |
|
"learning_rate": 2e-4, |
|
"fp16": True, |
|
"max_grad_norm": 0.3, |
|
"warmup_ratio": 0.03, |
|
"lr_scheduler_type": "cosine" |
|
} |
|
|
|
|
|
trainer = accelerator.prepare( |
|
datasets.Trainer( |
|
model=model, |
|
args=datasets.TrainingArguments(**training_args), |
|
train_dataset=dataset |
|
) |
|
) |
|
|
|
|
|
trainer.train() |
|
model.save_pretrained("./fine_tuned_model") |
|
print("Training completed!") |
|
|