Training in progress, step 40

Browse files

Files changed (5) hide show

axolotl_config.yaml +0 -7
config.json +1 -2
ds_config.yml +1 -1
model.safetensors +2 -2
training_args.bin +1 -1

axolotl_config.yaml CHANGED Viewed

@@ -1,6 +1,4 @@
-adapter: lora
 base_model: JackFram/llama-68m
-bf16: true
 chat_template: tokenizer_default_fallback_alpaca
 datasets:
 - format: custom
@@ -25,10 +23,6 @@ hub_model_id: SystemAdmin123/test-repo
 hub_strategy: checkpoint
 learning_rate: 0.0002
 logging_steps: 10
-lora_alpha: 256
-lora_dropout: 0.1
-lora_r: 128
-lora_target_linear: true
 max_steps: 2500
 micro_batch_size: 1
 num_epochs: 100
@@ -42,7 +36,6 @@ sequence_len: 2048
 special_tokens:
   pad_token: </s>
 tokenizer_type: LlamaTokenizerFast
-torch_dtype: bf16
 trust_remote_code: true
 val_set_size: 0.1
 wandb_entity: ''

 base_model: JackFram/llama-68m
 chat_template: tokenizer_default_fallback_alpaca
 datasets:
 - format: custom
 hub_strategy: checkpoint
 learning_rate: 0.0002
 logging_steps: 10
 max_steps: 2500
 micro_batch_size: 1
 num_epochs: 100
 special_tokens:
   pad_token: </s>
 tokenizer_type: LlamaTokenizerFast
 trust_remote_code: true
 val_set_size: 0.1
 wandb_entity: ''

config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "_attn_implementation_autoset": true,
   "_name_or_path": "JackFram/llama-68m",
   "architectures": [
     "LlamaForCausalLM"
@@ -25,7 +24,7 @@
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "float32",
   "transformers_version": "4.48.1",
   "use_cache": false,
   "vocab_size": 32000

 {
   "_name_or_path": "JackFram/llama-68m",
   "architectures": [
     "LlamaForCausalLM"
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
   "transformers_version": "4.48.1",
   "use_cache": false,
   "vocab_size": 32000

ds_config.yml CHANGED Viewed

@@ -1 +1 @@

- {"train_micro_batch_size_per_gpu": 1, "gradient_accumulation_steps": 32, "steps_per_print": 200, "bf16": {"enabled": ~~true~~}, "zero_optimization": {"stage": 2, "allgather_partitions": true, "reduce_scatter": true, "overlap_comm": true, "contiguous_gradients": true, "reduce_bucket_size": 5000000, "allgather_bucket_size": 5000000}, "optimizer": {"type": "AdamW", "params": {"lr": 0.0002, "betas": [0.9, 0.999], "eps": 1e-08, "weight_decay": 0.01}}, "scheduler": {"type": "WarmupCosineSchedule", "params": {"warmup_min_lr": 0, "warmup_max_lr": 0.0002, "warmup_num_steps": 125}}}

+ {"train_micro_batch_size_per_gpu": 1, "gradient_accumulation_steps": 32, "steps_per_print": 200, "bf16": {"enabled": false}, "zero_optimization": {"stage": 2, "allgather_partitions": true, "reduce_scatter": true, "overlap_comm": true, "contiguous_gradients": true, "reduce_bucket_size": 5000000, "allgather_bucket_size": 5000000}, "optimizer": {"type": "torch.optim.AdamW", "params": {"lr": 0.0002, "betas": [0.9, 0.999], "eps": 1e-08, "weight_decay": 0.01}}, "scheduler": {"type": "WarmupCosineSchedule", "params": {"warmup_min_lr": 0, "warmup_max_lr": 0.0002, "warmup_num_steps": 125}}}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1985749813398ab678fa16f868bbbedc4997e1118b6ce01f98888b06f4bc92c0
-size 4140280

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb869319ab00023388d1e988acef0010027b3678790ccae9050ce5b80348b1f4
+size 136062744

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:485478fcdd9047f9c99956450af35c4ddd14fedec639aa49f14d14f127fdac68
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:5414fad3ccf622c5fff1f84e82069b56b14de9a52dbbb9ddf1d853ff6aff2a29
 size 6840