from unsloth import FastLanguageModel from transformers import TrainingArguments, Trainer # Load quantized model model, tokenizer = FastLanguageModel.from_pretrained( model_name="deepseek-ai/DeepSeek-V3-0324", dtype=torch.bfloat16, load_in_4bit=True, # Or 2.71-bit token=os.environ["HF_TOKEN"] ) FastLanguageModel.for_training(model) # Training arguments training_args = TrainingArguments( output_dir="/app/checkpoints", per_device_train_batch_size=4, # Adjust for A100 40GB/80GB per_device_eval_batch_size=4, num_train_epochs=2, learning_rate=2e-5, save_steps=500, save_total_limit=2, evaluation_strategy="steps", eval_steps=500, logging_dir="/app/logs", logging_steps=100, fp16=False, # bfloat16 for A100 deepspeed="/app/ds_config.json" ) # DeepSpeed config with open("/app/ds_config.json", "w") as f: f.write(''' { "fp16": {"enabled": false}, "bf16": {"enabled": true}, "zero_optimization": { "stage": 3, "offload_optimizer": {"device": "cpu"}, "offload_param": {"device": "cpu"} }, "train_batch_size": "auto", "gradient_accumulation_steps": 4 } ''') # Initialize trainer trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset["train"], eval_dataset=tokenized_dataset["test"] ) # Train trainer.train() # Save model model.save_pretrained("/app/fine_tuned_model") tokenizer.save_pretrained("/app/fine_tuned_model")