SystemAdmin123 commited on
Commit
57779b1
·
verified ·
1 Parent(s): 57e4656

Training in progress, step 40

Browse files
adapter_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "JackFram/llama-68m",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": null,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 256,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0.1,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 128,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "gate_proj",
27
+ "o_proj",
28
+ "v_proj",
29
+ "q_proj",
30
+ "up_proj",
31
+ "down_proj",
32
+ "k_proj"
33
+ ],
34
+ "task_type": "CAUSAL_LM",
35
+ "use_dora": false,
36
+ "use_rslora": false
37
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3879c6f02d411a9060a269d073d0022e53fb5cd9351db19590d4ee8aeed425aa
3
+ size 18091656
axolotl_config.yaml CHANGED
@@ -1,5 +1,5 @@
1
- base_model: Xenova/tiny-random-Phi3ForCausalLM
2
- batch_size: 32
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
5
  datasets:
@@ -13,37 +13,41 @@ datasets:
13
  no_input_format: '{instruction}'
14
  system_format: '{system}'
15
  system_prompt: ''
16
- device_map: auto
17
  eval_sample_packing: false
18
  eval_steps: 200
19
  flash_attention: true
20
- gpu_memory_limit: 80GiB
 
 
21
  group_by_length: true
22
  hub_model_id: SystemAdmin123/test-repo
23
  hub_strategy: checkpoint
24
  learning_rate: 0.0002
25
  logging_steps: 10
26
- lr_scheduler: cosine
 
 
 
27
  max_steps: 2500
28
- micro_batch_size: 4
29
- model_type: AutoModelForCausalLM
30
  num_epochs: 100
31
- optimizer: adamw_bnb_8bit
32
- output_dir: /root/.sn56/axolotl/outputs/test-repo
33
  pad_to_sequence_len: true
34
  resize_token_embeddings_to_32x: false
35
  sample_packing: false
36
- save_steps: 400
37
- save_total_limit: 1
38
  sequence_len: 2048
 
 
39
  tokenizer_type: LlamaTokenizerFast
40
  torch_dtype: bf16
41
  trust_remote_code: true
42
  val_set_size: 0.1
43
  wandb_entity: ''
44
  wandb_mode: online
45
- wandb_name: Xenova/tiny-random-Phi3ForCausalLM-argilla/databricks-dolly-15k-curated-en
46
  wandb_project: Gradients-On-Demand
47
  wandb_run: your_name
48
  wandb_runid: default
49
- warmup_ratio: 0.05
 
1
+ adapter: lora
2
+ base_model: JackFram/llama-68m
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
5
  datasets:
 
13
  no_input_format: '{instruction}'
14
  system_format: '{system}'
15
  system_prompt: ''
16
+ deepspeed_config: /root/.sn56/axolotl/tmp/test-repo/ds_config.json
17
  eval_sample_packing: false
18
  eval_steps: 200
19
  flash_attention: true
20
+ gpu_memory_limit: 79GiB
21
+ gradient_accumulation_steps: 32
22
+ gradient_checkpointing: true
23
  group_by_length: true
24
  hub_model_id: SystemAdmin123/test-repo
25
  hub_strategy: checkpoint
26
  learning_rate: 0.0002
27
  logging_steps: 10
28
+ lora_alpha: 256
29
+ lora_dropout: 0.1
30
+ lora_r: 128
31
+ lora_target_linear: true
32
  max_steps: 2500
33
+ micro_batch_size: 1
 
34
  num_epochs: 100
35
+ output_dir: /root/.sn56/axolotl/tmp/test-repo
 
36
  pad_to_sequence_len: true
37
  resize_token_embeddings_to_32x: false
38
  sample_packing: false
39
+ save_steps: 40
40
+ save_total_limit: 2
41
  sequence_len: 2048
42
+ special_tokens:
43
+ pad_token: </s>
44
  tokenizer_type: LlamaTokenizerFast
45
  torch_dtype: bf16
46
  trust_remote_code: true
47
  val_set_size: 0.1
48
  wandb_entity: ''
49
  wandb_mode: online
50
+ wandb_name: JackFram/llama-68m-argilla/databricks-dolly-15k-curated-en
51
  wandb_project: Gradients-On-Demand
52
  wandb_run: your_name
53
  wandb_runid: default
 
config.json CHANGED
@@ -1,31 +1,32 @@
1
  {
2
- "_name_or_path": "Xenova/tiny-random-Phi3ForCausalLM",
 
3
  "architectures": [
4
- "Phi3ForCausalLM"
5
  ],
 
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 1,
8
- "embd_pdrop": 0.0,
9
- "eos_token_id": 32000,
10
  "hidden_act": "silu",
11
- "hidden_size": 32,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 64,
14
- "max_position_embeddings": 4096,
15
- "model_type": "phi3",
16
- "num_attention_heads": 4,
 
17
  "num_hidden_layers": 2,
18
- "num_key_value_heads": 4,
19
- "original_max_position_embeddings": 4096,
20
- "pad_token_id": 32000,
21
- "resid_pdrop": 0.0,
22
- "rms_norm_eps": 1e-05,
23
  "rope_scaling": null,
24
  "rope_theta": 10000.0,
25
- "sliding_window": 2047,
26
  "tie_word_embeddings": false,
27
- "torch_dtype": "bfloat16",
28
  "transformers_version": "4.48.1",
29
  "use_cache": false,
30
- "vocab_size": 32011
31
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "JackFram/llama-68m",
4
  "architectures": [
5
+ "LlamaForCausalLM"
6
  ],
7
+ "attention_bias": false,
8
  "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "eos_token_id": 2,
11
+ "head_dim": 64,
12
  "hidden_act": "silu",
13
+ "hidden_size": 768,
14
  "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "max_position_embeddings": 2048,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 12,
20
  "num_hidden_layers": 2,
21
+ "num_key_value_heads": 12,
22
+ "pad_token_id": 1,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-06,
 
25
  "rope_scaling": null,
26
  "rope_theta": 10000.0,
 
27
  "tie_word_embeddings": false,
28
+ "torch_dtype": "float32",
29
  "transformers_version": "4.48.1",
30
  "use_cache": false,
31
+ "vocab_size": 32000
32
  }
ds_config.yml ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train_micro_batch_size_per_gpu": 1, "gradient_accumulation_steps": 32, "steps_per_print": 200, "bf16": {"enabled": true}, "zero_optimization": {"stage": 2, "allgather_partitions": true, "reduce_scatter": true, "overlap_comm": true, "contiguous_gradients": true, "reduce_bucket_size": 5000000, "allgather_bucket_size": 5000000}, "optimizer": {"type": "AdamW", "params": {"lr": 0.0002, "betas": [0.9, 0.999], "eps": 1e-08, "weight_decay": 0.01}}, "scheduler": {"type": "WarmupCosineSchedule", "params": {"warmup_min_lr": 0, "warmup_max_lr": 0.0002, "warmup_num_steps": 125}}}
special_tokens_map.json CHANGED
@@ -2,19 +2,19 @@
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
5
- "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|endoftext|>",
11
  "lstrip": false,
12
- "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|endoftext|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
@@ -23,7 +23,7 @@
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
26
- "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  }
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
5
+ "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "</s>",
11
  "lstrip": false,
12
+ "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "</s>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
26
+ "normalized": true,
27
  "rstrip": false,
28
  "single_word": false
29
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b013fe7282b7984bc14b1c64c2a70dd06b652a969810fbba6217f4ac70339f44
3
- size 3621089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a70193a38a63cf5d5f51f6b2b989eb77af06336171584533c4c6012fc06894
3
+ size 3619014
tokenizer_config.json CHANGED
@@ -6,7 +6,7 @@
6
  "0": {
7
  "content": "<unk>",
8
  "lstrip": false,
9
- "normalized": false,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
@@ -14,7 +14,7 @@
14
  "1": {
15
  "content": "<s>",
16
  "lstrip": false,
17
- "normalized": false,
18
  "rstrip": false,
19
  "single_word": false,
20
  "special": true
@@ -23,109 +23,21 @@
23
  "content": "</s>",
24
  "lstrip": false,
25
  "normalized": false,
26
- "rstrip": true,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "32000": {
31
- "content": "<|endoftext|>",
32
- "lstrip": false,
33
- "normalized": false,
34
  "rstrip": false,
35
  "single_word": false,
36
  "special": true
37
- },
38
- "32001": {
39
- "content": "<|assistant|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": true,
43
- "single_word": false,
44
- "special": true
45
- },
46
- "32002": {
47
- "content": "<|placeholder1|>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": true,
51
- "single_word": false,
52
- "special": true
53
- },
54
- "32003": {
55
- "content": "<|placeholder2|>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": true,
59
- "single_word": false,
60
- "special": true
61
- },
62
- "32004": {
63
- "content": "<|placeholder3|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": true,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "32005": {
71
- "content": "<|placeholder4|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": true,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "32006": {
79
- "content": "<|system|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": true,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "32007": {
87
- "content": "<|end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": true,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "32008": {
95
- "content": "<|placeholder5|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": true,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "32009": {
103
- "content": "<|placeholder6|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": true,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "32010": {
111
- "content": "<|user|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": true,
115
- "single_word": false,
116
- "special": true
117
  }
118
  },
119
  "bos_token": "<s>",
120
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
121
  "clean_up_tokenization_spaces": false,
122
- "eos_token": "<|endoftext|>",
123
  "extra_special_tokens": {},
124
  "legacy": true,
125
- "model_max_length": 4096,
126
- "pad_token": "<|endoftext|>",
127
- "padding_side": "left",
128
  "sp_model_kwargs": {},
 
129
  "tokenizer_class": "LlamaTokenizer",
130
  "unk_token": "<unk>",
131
  "use_default_system_prompt": false,
 
6
  "0": {
7
  "content": "<unk>",
8
  "lstrip": false,
9
+ "normalized": true,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
 
14
  "1": {
15
  "content": "<s>",
16
  "lstrip": false,
17
+ "normalized": true,
18
  "rstrip": false,
19
  "single_word": false,
20
  "special": true
 
23
  "content": "</s>",
24
  "lstrip": false,
25
  "normalized": false,
 
 
 
 
 
 
 
 
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
31
  "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response: ' + message['content'] + eos_token}}{% endif %}{% endfor %}",
33
  "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
  "extra_special_tokens": {},
36
  "legacy": true,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
 
39
  "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
  "tokenizer_class": "LlamaTokenizer",
42
  "unk_token": "<unk>",
43
  "use_default_system_prompt": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ffd459aa83253f73e371e129574dd0434ee79c2c18eb103b0e4428a34062eb2
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab35bb512bd0815bfb48651f4d2ed30517d244444ffa68a91ce2ff049faa81b0
3
  size 6840