SystemAdmin123 commited on
Commit
548bfa1
·
verified ·
1 Parent(s): 311c564

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: JackFram/llama-68m
3
  library_name: peft
4
  ---
5
 
 
1
  ---
2
+ base_model: peft-internal-testing/tiny-dummy-qwen2
3
  library_name: peft
4
  ---
5
 
last-checkpoint/adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "JackFram/llama-68m",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_proj",
27
- "o_proj",
28
  "v_proj",
 
 
29
  "q_proj",
30
- "up_proj",
31
  "down_proj",
32
- "k_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "peft-internal-testing/tiny-dummy-qwen2",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "v_proj",
27
+ "o_proj",
28
+ "gate_proj",
29
  "q_proj",
 
30
  "down_proj",
31
+ "k_proj",
32
+ "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf6bc89ed515c11edca157585cc4d08f3b032c0ffd4314aa79333ca1f3c8d512
3
- size 18091656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:159277feec320a7592284f9e3696651e378c75cb98a9616d5fc9a41efc0eec40
3
+ size 183784
last-checkpoint/added_tokens.json CHANGED
@@ -1,4 +1,5 @@
1
  {
2
- "<|im_end|>": 32000,
3
- "<|im_start|>": 32001
 
4
  }
 
1
  {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
  }
last-checkpoint/merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b12933aa23a0c560e75cf26a59069e1e7ada485e87376d133ef8818dfabd9bd9
3
- size 14710155092
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c71a9e87aa3c595eecb7f30d889af53933360e76b6456ae393643c7d90dcacb
3
+ size 236760
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f20d155a14d34cd1bb6d04e5de90f3224906e1758821edd752a8f1a9085a2db
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99ae411404197432466fcb959effa7956b7aabac6b0ee3018ce67d44282b87b1
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad18e870a176ca75a54f6620f83de92bdfd5a91302744d90bff8e5feae2fe0c5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e0c13bbed523a6d7bec142d7a3836e9629b2dc23935ee4a5086689a05f762e6
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9009a52be47b75834407dc5e146ed5360e6f23a35bff27bab34ef6fb47df1661
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29316b978407a35ab6f860f3a2bcf442e67f1f9bd92ef1016961e6d3aa0c3d14
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb494a08fd57e7c6f63f06826c872164986e81b271996be0496671f713bdcc3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45e9c496f59967ab95befd85ccf8b9fef5104a06d33cdbe2714ed501882c6167
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cd11d413bc67bf01de9a1a006e9e7655be307353028b25f5b3c299e5b6b7a44
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcaaab9b4e9d97f524192da2cde7b8ea63f0956124955e5031658e7310a4fbcd
3
  size 1064
last-checkpoint/special_tokens_map.json CHANGED
@@ -1,27 +1,17 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
  "eos_token": {
10
- "content": "<|im_end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<unk>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<unk>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
 
 
 
6
  "eos_token": {
7
+ "content": "<|endoftext|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
  "pad_token": {
14
+ "content": "<|endoftext|>",
 
 
 
 
 
 
 
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
last-checkpoint/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04222cd76979c181cd3f72c3bf6982fe2a09d9f4b8f23d82902efde18f1d0668
3
- size 3506125
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
3
+ size 11418266
last-checkpoint/tokenizer_config.json CHANGED
@@ -1,64 +1,45 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
- "0": {
7
- "content": "<unk>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
13
  },
14
- "1": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "2": {
23
- "content": "</s>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
29
  },
30
- "32000": {
31
  "content": "<|im_end|>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false,
36
  "special": true
37
- },
38
- "32001": {
39
- "content": "<|im_start|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": true
45
  }
46
  },
47
- "additional_special_tokens": [],
48
- "bos_token": "<s>",
 
 
 
49
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
50
  "clean_up_tokenization_spaces": false,
51
- "eos_token": "<|im_end|>",
 
52
  "extra_special_tokens": {},
53
- "legacy": true,
54
  "model_max_length": 32768,
55
- "pad_token": "<unk>",
56
- "padding_side": "right",
57
- "sp_model_kwargs": {},
58
- "spaces_between_special_tokens": false,
59
- "tokenizer_class": "LlamaTokenizer",
60
- "trust_remote_code": false,
61
- "unk_token": "<unk>",
62
- "use_default_system_prompt": true,
63
  "use_fast": true
64
  }
 
1
  {
2
+ "add_prefix_space": false,
 
 
3
  "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "151644": {
13
+ "content": "<|im_start|>",
 
 
 
 
 
 
 
 
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "151645": {
21
  "content": "<|im_end|>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
 
 
 
 
 
 
 
 
27
  }
28
  },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|endoftext|>",
37
+ "errors": "replace",
38
  "extra_special_tokens": {},
 
39
  "model_max_length": 32768,
40
+ "pad_token": "<|endoftext|>",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null,
 
 
 
 
44
  "use_fast": true
45
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,175 +1,71 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9047619047619047,
5
- "eval_steps": 200,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.009523809523809525,
13
- "eval_loss": 1.873080849647522,
14
- "eval_runtime": 27.7442,
15
- "eval_samples_per_second": 54.101,
16
- "eval_steps_per_second": 6.776,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 0.09523809523809523,
21
- "grad_norm": 32.75,
22
- "learning_rate": 6.666666666666667e-05,
23
- "loss": 2.6922,
24
  "step": 10
25
  },
26
  {
27
- "epoch": 0.19047619047619047,
28
- "grad_norm": 628.0,
29
- "learning_rate": 0.00013333333333333334,
30
- "loss": 3.47,
31
- "step": 20
32
- },
33
- {
34
- "epoch": 0.2857142857142857,
35
- "grad_norm": 141.0,
36
- "learning_rate": 0.0002,
37
- "loss": 9.2162,
38
- "step": 30
39
- },
40
- {
41
- "epoch": 0.38095238095238093,
42
- "grad_norm": 30.625,
43
- "learning_rate": 0.00019984815164333163,
44
- "loss": 8.4222,
45
- "step": 40
46
- },
47
- {
48
- "epoch": 0.47619047619047616,
49
- "grad_norm": 44.75,
50
- "learning_rate": 0.00019939306773179497,
51
- "loss": 8.4529,
52
- "step": 50
53
- },
54
- {
55
- "epoch": 0.5714285714285714,
56
- "grad_norm": 29.375,
57
- "learning_rate": 0.00019863613034027224,
58
- "loss": 8.0506,
59
- "step": 60
60
- },
61
- {
62
- "epoch": 0.6666666666666666,
63
- "grad_norm": 25.375,
64
- "learning_rate": 0.00019757963826274357,
65
- "loss": 8.6481,
66
- "step": 70
67
- },
68
- {
69
- "epoch": 0.7619047619047619,
70
- "grad_norm": 26.75,
71
- "learning_rate": 0.00019622680003092503,
72
- "loss": 7.9906,
73
- "step": 80
74
- },
75
- {
76
- "epoch": 0.8571428571428571,
77
- "grad_norm": 33.25,
78
- "learning_rate": 0.00019458172417006347,
79
- "loss": 8.4843,
80
- "step": 90
81
- },
82
- {
83
- "epoch": 0.9523809523809523,
84
- "grad_norm": 23.5,
85
- "learning_rate": 0.00019264940672148018,
86
- "loss": 7.4575,
87
- "step": 100
88
- },
89
- {
90
- "epoch": 1.0476190476190477,
91
- "grad_norm": 16.25,
92
- "learning_rate": 0.00019043571606975777,
93
- "loss": 7.4497,
94
- "step": 110
95
- },
96
- {
97
- "epoch": 1.1428571428571428,
98
- "grad_norm": 19.75,
99
- "learning_rate": 0.0001879473751206489,
100
- "loss": 7.3829,
101
- "step": 120
102
- },
103
- {
104
- "epoch": 1.2380952380952381,
105
- "grad_norm": 13.6875,
106
- "learning_rate": 0.00018519194088383273,
107
- "loss": 7.3748,
108
- "step": 130
109
- },
110
- {
111
- "epoch": 1.3333333333333333,
112
- "grad_norm": 25.0,
113
- "learning_rate": 0.0001821777815225245,
114
- "loss": 7.3801,
115
- "step": 140
116
  },
117
  {
118
- "epoch": 1.4285714285714286,
119
- "grad_norm": 13.0625,
120
- "learning_rate": 0.00017891405093963938,
121
- "loss": 7.2868,
122
- "step": 150
123
- },
124
- {
125
- "epoch": 1.5238095238095237,
126
- "grad_norm": 52.0,
127
- "learning_rate": 0.00017541066097768963,
128
- "loss": 7.2058,
129
- "step": 160
130
- },
131
- {
132
- "epoch": 1.619047619047619,
133
- "grad_norm": 98.5,
134
- "learning_rate": 0.00017167825131684513,
135
- "loss": 7.2541,
136
- "step": 170
137
- },
138
- {
139
- "epoch": 1.7142857142857144,
140
- "grad_norm": 10.8125,
141
- "learning_rate": 0.00016772815716257412,
142
- "loss": 7.1795,
143
- "step": 180
144
  },
145
  {
146
- "epoch": 1.8095238095238095,
147
- "grad_norm": 18.125,
148
- "learning_rate": 0.00016357237482099684,
149
- "loss": 7.0485,
150
- "step": 190
151
  },
152
  {
153
- "epoch": 1.9047619047619047,
154
- "grad_norm": 9.4375,
155
- "learning_rate": 0.00015922352526649803,
156
- "loss": 6.9094,
157
- "step": 200
 
158
  },
159
  {
160
- "epoch": 1.9047619047619047,
161
- "eval_loss": 6.993889808654785,
162
- "eval_runtime": 27.036,
163
- "eval_samples_per_second": 55.519,
164
- "eval_steps_per_second": 6.954,
165
- "step": 200
166
  }
167
  ],
168
  "logging_steps": 10,
169
- "max_steps": 600,
170
  "num_input_tokens_seen": 0,
171
  "num_train_epochs": 6,
172
- "save_steps": 200,
173
  "stateful_callbacks": {
174
  "TrainerControl": {
175
  "args": {
@@ -182,8 +78,8 @@
182
  "attributes": {}
183
  }
184
  },
185
- "total_flos": 1.3980142170800128e+17,
186
- "train_batch_size": 2,
187
  "trial_name": null,
188
  "trial_params": null
189
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3793103448275863,
5
+ "eval_steps": 16,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.034482758620689655,
13
+ "eval_loss": 11.93175220489502,
14
+ "eval_runtime": 0.8699,
15
+ "eval_samples_per_second": 173.589,
16
+ "eval_steps_per_second": 6.898,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 0.3448275862068966,
21
+ "grad_norm": 0.04096066579222679,
22
+ "learning_rate": 9.995728791936504e-05,
23
+ "loss": 11.9309,
24
  "step": 10
25
  },
26
  {
27
+ "epoch": 0.5517241379310345,
28
+ "eval_loss": 11.927915573120117,
29
+ "eval_runtime": 0.9134,
30
+ "eval_samples_per_second": 165.318,
31
+ "eval_steps_per_second": 6.569,
32
+ "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
  {
35
+ "epoch": 0.6896551724137931,
36
+ "grad_norm": 0.10107108950614929,
37
+ "learning_rate": 9.847001329696653e-05,
38
+ "loss": 11.9282,
39
+ "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  },
41
  {
42
+ "epoch": 1.0344827586206897,
43
+ "grad_norm": 0.04157465696334839,
44
+ "learning_rate": 9.491954909459895e-05,
45
+ "loss": 11.9236,
46
+ "step": 30
47
  },
48
  {
49
+ "epoch": 1.103448275862069,
50
+ "eval_loss": 11.921222686767578,
51
+ "eval_runtime": 0.939,
52
+ "eval_samples_per_second": 160.809,
53
+ "eval_steps_per_second": 6.39,
54
+ "step": 32
55
  },
56
  {
57
+ "epoch": 1.3793103448275863,
58
+ "grad_norm": 0.025179261341691017,
59
+ "learning_rate": 8.945702546981969e-05,
60
+ "loss": 11.9214,
61
+ "step": 40
 
62
  }
63
  ],
64
  "logging_steps": 10,
65
+ "max_steps": 160,
66
  "num_input_tokens_seen": 0,
67
  "num_train_epochs": 6,
68
+ "save_steps": 40,
69
  "stateful_callbacks": {
70
  "TrainerControl": {
71
  "args": {
 
78
  "attributes": {}
79
  }
80
  },
81
+ "total_flos": 647500922880.0,
82
+ "train_batch_size": 7,
83
  "trial_name": null,
84
  "trial_params": null
85
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef3eb8b3b2a8acd873a0906e2cee7771ee216bb2230a5edc2d984d58604ec971
3
- size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37476a182ec272b4a6a6254cda6d0ce55ff7d3844823bf9121f7eccf202f4723
3
+ size 6648
last-checkpoint/vocab.json CHANGED
The diff for this file is too large to render. See raw diff