Training in progress, step 40

Browse files

Files changed (9) hide show

adapter_config.json +37 -0
adapter_model.safetensors +3 -0
axolotl_config.yaml +17 -13
config.json +19 -18
ds_config.yml +1 -0
special_tokens_map.json +5 -5
tokenizer.json +2 -2
tokenizer_config.json +7 -95
training_args.bin +1 -1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "JackFram/llama-68m",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "o_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "down_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3879c6f02d411a9060a269d073d0022e53fb5cd9351db19590d4ee8aeed425aa
+size 18091656

axolotl_config.yaml CHANGED Viewed

@@ -1,5 +1,5 @@
-base_model: Xenova/tiny-random-Phi3ForCausalLM
-batch_size: 32
 bf16: true
 chat_template: tokenizer_default_fallback_alpaca
 datasets:
@@ -13,37 +13,41 @@ datasets:
     no_input_format: '{instruction}'
     system_format: '{system}'
     system_prompt: ''
-device_map: auto
 eval_sample_packing: false
 eval_steps: 200
 flash_attention: true
-gpu_memory_limit: 80GiB
 group_by_length: true
 hub_model_id: SystemAdmin123/test-repo
 hub_strategy: checkpoint
 learning_rate: 0.0002
 logging_steps: 10
-lr_scheduler: cosine
 max_steps: 2500
-micro_batch_size: 4
-model_type: AutoModelForCausalLM
 num_epochs: 100
-optimizer: adamw_bnb_8bit
-output_dir: /root/.sn56/axolotl/outputs/test-repo
 pad_to_sequence_len: true
 resize_token_embeddings_to_32x: false
 sample_packing: false
-save_steps: 400
-save_total_limit: 1
 sequence_len: 2048
 tokenizer_type: LlamaTokenizerFast
 torch_dtype: bf16
 trust_remote_code: true
 val_set_size: 0.1
 wandb_entity: ''
 wandb_mode: online
-wandb_name: Xenova/tiny-random-Phi3ForCausalLM-argilla/databricks-dolly-15k-curated-en
 wandb_project: Gradients-On-Demand
 wandb_run: your_name
 wandb_runid: default
-warmup_ratio: 0.05

+adapter: lora
+base_model: JackFram/llama-68m
 bf16: true
 chat_template: tokenizer_default_fallback_alpaca
 datasets:
     no_input_format: '{instruction}'
     system_format: '{system}'
     system_prompt: ''
+deepspeed_config: /root/.sn56/axolotl/tmp/test-repo/ds_config.json
 eval_sample_packing: false
 eval_steps: 200
 flash_attention: true
+gpu_memory_limit: 79GiB
+gradient_accumulation_steps: 32
+gradient_checkpointing: true
 group_by_length: true
 hub_model_id: SystemAdmin123/test-repo
 hub_strategy: checkpoint
 learning_rate: 0.0002
 logging_steps: 10
+lora_alpha: 256
+lora_dropout: 0.1
+lora_r: 128
+lora_target_linear: true
 max_steps: 2500
+micro_batch_size: 1
 num_epochs: 100
+output_dir: /root/.sn56/axolotl/tmp/test-repo
 pad_to_sequence_len: true
 resize_token_embeddings_to_32x: false
 sample_packing: false
+save_steps: 40
+save_total_limit: 2
 sequence_len: 2048
+special_tokens:
+  pad_token: </s>
 tokenizer_type: LlamaTokenizerFast
 torch_dtype: bf16
 trust_remote_code: true
 val_set_size: 0.1
 wandb_entity: ''
 wandb_mode: online
+wandb_name: JackFram/llama-68m-argilla/databricks-dolly-15k-curated-en
 wandb_project: Gradients-On-Demand
 wandb_run: your_name
 wandb_runid: default

config.json CHANGED Viewed

@@ -1,31 +1,32 @@
 {
-  "_name_or_path": "Xenova/tiny-random-Phi3ForCausalLM",
   "architectures": [
-    "Phi3ForCausalLM"
   ],
   "attention_dropout": 0.0,
-  "bos_token_id": 1,
-  "embd_pdrop": 0.0,
-  "eos_token_id": 32000,
   "hidden_act": "silu",
-  "hidden_size": 32,
   "initializer_range": 0.02,
-  "intermediate_size": 64,
-  "max_position_embeddings": 4096,
-  "model_type": "phi3",
-  "num_attention_heads": 4,
   "num_hidden_layers": 2,
-  "num_key_value_heads": 4,
-  "original_max_position_embeddings": 4096,
-  "pad_token_id": 32000,
-  "resid_pdrop": 0.0,
-  "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 10000.0,
-  "sliding_window": 2047,
   "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
   "transformers_version": "4.48.1",
   "use_cache": false,
-  "vocab_size": 32011
 }

 {
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "JackFram/llama-68m",
   "architectures": [
+    "LlamaForCausalLM"
   ],
+  "attention_bias": false,
   "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "head_dim": 64,
   "hidden_act": "silu",
+  "hidden_size": 768,
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 2048,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 12,
   "num_hidden_layers": 2,
+  "num_key_value_heads": 12,
+  "pad_token_id": 1,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
+  "torch_dtype": "float32",
   "transformers_version": "4.48.1",
   "use_cache": false,
+  "vocab_size": 32000
 }

ds_config.yml ADDED Viewed

	@@ -0,0 +1 @@

+ {"train_micro_batch_size_per_gpu": 1, "gradient_accumulation_steps": 32, "steps_per_print": 200, "bf16": {"enabled": true}, "zero_optimization": {"stage": 2, "allgather_partitions": true, "reduce_scatter": true, "overlap_comm": true, "contiguous_gradients": true, "reduce_bucket_size": 5000000, "allgather_bucket_size": 5000000}, "optimizer": {"type": "AdamW", "params": {"lr": 0.0002, "betas": [0.9, 0.999], "eps": 1e-08, "weight_decay": 0.01}}, "scheduler": {"type": "WarmupCosineSchedule", "params": {"warmup_min_lr": 0, "warmup_max_lr": 0.0002, "warmup_num_steps": 125}}}

special_tokens_map.json CHANGED Viewed

@@ -2,19 +2,19 @@
   "bos_token": {
     "content": "<s>",
     "lstrip": false,
-    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
-    "content": "<|endoftext|>",
     "lstrip": false,
-    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
@@ -23,7 +23,7 @@
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,
-    "normalized": false,
     "rstrip": false,
     "single_word": false
   }

   "bos_token": {
     "content": "<s>",
     "lstrip": false,
+    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
+    "content": "</s>",
     "lstrip": false,
+    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "</s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,
+    "normalized": true,
     "rstrip": false,
     "single_word": false
   }

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b013fe7282b7984bc14b1c64c2a70dd06b652a969810fbba6217f4ac70339f44
-size 3621089

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1a70193a38a63cf5d5f51f6b2b989eb77af06336171584533c4c6012fc06894
+size 3619014

tokenizer_config.json CHANGED Viewed

@@ -6,7 +6,7 @@
     "0": {
       "content": "<unk>",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
@@ -14,7 +14,7 @@
     "1": {
       "content": "<s>",
       "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
@@ -23,109 +23,21 @@
       "content": "</s>",
       "lstrip": false,
       "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": false
-    },
-    "32000": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "32001": {
-      "content": "<|assistant|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32002": {
-      "content": "<|placeholder1|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32003": {
-      "content": "<|placeholder2|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32004": {
-      "content": "<|placeholder3|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32005": {
-      "content": "<|placeholder4|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32006": {
-      "content": "<|system|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32007": {
-      "content": "<|end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32008": {
-      "content": "<|placeholder5|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32009": {
-      "content": "<|placeholder6|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
-    },
-    "32010": {
-      "content": "<|user|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": true,
-      "single_word": false,
-      "special": true
     }
   },
   "bos_token": "<s>",
-  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|endoftext|>",
   "extra_special_tokens": {},
   "legacy": true,
-  "model_max_length": 4096,
-  "pad_token": "<|endoftext|>",
-  "padding_side": "left",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false,

     "0": {
       "content": "<unk>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     "1": {
       "content": "<s>",
       "lstrip": false,
+      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
       "content": "</s>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response: ' + message['content'] + eos_token}}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
   "extra_special_tokens": {},
   "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
   "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ffd459aa83253f73e371e129574dd0434ee79c2c18eb103b0e4428a34062eb2
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab35bb512bd0815bfb48651f4d2ed30517d244444ffa68a91ce2ff049faa81b0
 size 6840