Spaces:
Build error
Build error
Refactor train.py to utilize a comprehensive configuration structure from config.yaml, enhancing model loading, dataset handling, and trainer setup. This update centralizes parameters for model, PEFT, dataset, and training settings, improving maintainability and flexibility.
611c848
unverified
defaults: | |
- _self_ | |
# Model configuration | |
model: | |
name: "unsloth/SmolLM2-135M-Instruct-bnb-4bit" | |
max_seq_length: 2048 # Auto supports RoPE Scaling internally | |
dtype: null # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ | |
load_in_4bit: true # Use 4bit quantization to reduce memory usage | |
# PEFT configuration | |
peft: | |
r: 64 | |
lora_alpha: 128 | |
lora_dropout: 0.05 | |
bias: "none" | |
use_gradient_checkpointing: "unsloth" | |
random_state: 3407 | |
use_rslora: true | |
loftq_config: null | |
target_modules: | |
- "q_proj" | |
- "k_proj" | |
- "v_proj" | |
- "o_proj" | |
- "gate_proj" | |
- "up_proj" | |
- "down_proj" | |
# Dataset configuration | |
dataset: | |
validation_split: 0.1 # 10% of data for validation | |
seed: 3407 # Random seed for dataset splitting | |
# Training configuration | |
training: | |
args: | |
per_device_train_batch_size: 2 | |
per_device_eval_batch_size: 2 | |
gradient_accumulation_steps: 16 | |
warmup_steps: 100 | |
max_steps: 120 | |
learning_rate: 5e-5 | |
logging_steps: 1 | |
save_strategy: "steps" | |
save_steps: 30 | |
eval_strategy: "steps" | |
eval_steps: 30 | |
save_total_limit: 2 | |
optim: "adamw_8bit" | |
weight_decay: 0.01 | |
lr_scheduler_type: "cosine_with_restarts" | |
seed: 3407 | |
output_dir: "outputs" | |
gradient_checkpointing: true | |
load_best_model_at_end: true | |
metric_for_best_model: "eval_loss" | |
greater_is_better: false | |
sft: | |
dataset_num_proc: 2 | |
packing: false | |
data_collator: | |
mlm: false | |
pad_to_multiple_of: 8 | |
# Output configuration | |
output: | |
dir: "final_model" | |
# Training control | |
train: false | |