Final_Assignment_Template

Build error

Final_Assignment_Template / conf /config.yaml

Refactor train.py to utilize a comprehensive configuration structure from config.yaml, enhancing model loading, dataset handling, and trainer setup. This update centralizes parameters for model, PEFT, dataset, and training settings, improving maintainability and flexibility.

611c848 unverified about 1 month ago

raw

history blame

1.6 kB

	defaults:
	- _self_

	# Model configuration
	model:
	name: "unsloth/SmolLM2-135M-Instruct-bnb-4bit"
	max_seq_length: 2048 # Auto supports RoPE Scaling internally
	dtype: null # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
	load_in_4bit: true # Use 4bit quantization to reduce memory usage

	# PEFT configuration
	peft:
	r: 64
	lora_alpha: 128
	lora_dropout: 0.05
	bias: "none"
	use_gradient_checkpointing: "unsloth"
	random_state: 3407
	use_rslora: true
	loftq_config: null
	target_modules:
	- "q_proj"
	- "k_proj"
	- "v_proj"
	- "o_proj"
	- "gate_proj"
	- "up_proj"
	- "down_proj"

	# Dataset configuration
	dataset:
	validation_split: 0.1 # 10% of data for validation
	seed: 3407 # Random seed for dataset splitting

	# Training configuration
	training:
	args:
	per_device_train_batch_size: 2
	per_device_eval_batch_size: 2
	gradient_accumulation_steps: 16
	warmup_steps: 100
	max_steps: 120
	learning_rate: 5e-5
	logging_steps: 1
	save_strategy: "steps"
	save_steps: 30
	eval_strategy: "steps"
	eval_steps: 30
	save_total_limit: 2
	optim: "adamw_8bit"
	weight_decay: 0.01
	lr_scheduler_type: "cosine_with_restarts"
	seed: 3407
	output_dir: "outputs"
	gradient_checkpointing: true
	load_best_model_at_end: true
	metric_for_best_model: "eval_loss"
	greater_is_better: false

	sft:
	dataset_num_proc: 2
	packing: false
	data_collator:
	mlm: false
	pad_to_multiple_of: 8

	# Output configuration
	output:
	dir: "final_model"

	# Training control
	train: false