SystemAdmin123 commited on
Commit
f9a47e6
·
verified ·
1 Parent(s): ed01ca0

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae2287b45658d1cae69b4bfe25e777f022036e8d980148fa7635d454487588a9
3
  size 2066752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a257c5b2e34eda80e3d84f1a8cc4247ba163d63b057609915a20d5c03487fe
3
  size 2066752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:556e1830f3d5b4cc26513099e09450196ba4a64ae97c75af17431421d368e625
3
  size 2162798
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54a820f98fa07f24b707a5b13ad41e602ccc916761791610cd651856882df16
3
  size 2162798
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4faa065a55913b65f4b0549e4d93d87e8865c0f6ec216f40a3de4d251a15322a
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4651e32e118f1ea1a8e26dfbbe64298593e12e6a71bcd36cb77f04f86d3f86d
3
+ size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10ae8864af9d168bc9a94e5c5625da874d35a133304d7d7414b10c80148467d4
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c1eba909fbb51daca773a25c075f182b4096aff21c9b4ff19dbada2080ac99
3
+ size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ece1ed46b8aa193251efdc1d8393b3bb872b53f6ba93c31cc3efc627b34d74be
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28adb9f06e220aefdc723ea4380a84d42b8bfb87cc53ce65859d55ce1876f51c
3
+ size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3018d94a8b9b3b95a3578032d80b8d3f31c01fab9a615c48039128422aba13ef
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:423c49ed521f6986d20d8b29112b383f4b0f3f2e228084ef82c2ad7dcd5d1de8
3
+ size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74485e67705dc36efbfb69b1e54f842e1ff07894d01bb0e36d6d2526a318b300
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,244 +1,96 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 100.0,
5
  "eval_steps": 200,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.3333333333333333,
13
- "eval_loss": 10.375551223754883,
14
- "eval_runtime": 1.0009,
15
- "eval_samples_per_second": 1499.719,
16
- "eval_steps_per_second": 5.995,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 3.3333333333333335,
21
- "grad_norm": 0.1015625,
22
- "learning_rate": 0.00019945218953682734,
23
- "loss": 10.3718,
24
  "step": 10
25
  },
26
  {
27
- "epoch": 6.666666666666667,
28
- "grad_norm": 0.162109375,
29
- "learning_rate": 0.00019781476007338058,
30
- "loss": 10.3553,
31
  "step": 20
32
  },
33
  {
34
- "epoch": 10.0,
35
- "grad_norm": 0.291015625,
36
- "learning_rate": 0.00019510565162951537,
37
- "loss": 10.3251,
38
  "step": 30
39
  },
40
  {
41
- "epoch": 13.333333333333334,
42
- "grad_norm": 0.330078125,
43
- "learning_rate": 0.0001913545457642601,
44
- "loss": 10.2723,
45
  "step": 40
46
  },
47
  {
48
- "epoch": 16.666666666666668,
49
- "grad_norm": 0.328125,
50
- "learning_rate": 0.00018660254037844388,
51
- "loss": 10.2096,
52
  "step": 50
53
  },
54
  {
55
- "epoch": 20.0,
56
- "grad_norm": 0.328125,
57
- "learning_rate": 0.00018090169943749476,
58
- "loss": 10.1499,
59
  "step": 60
60
  },
61
  {
62
- "epoch": 23.333333333333332,
63
  "grad_norm": 0.326171875,
64
- "learning_rate": 0.00017431448254773944,
65
- "loss": 10.0935,
66
  "step": 70
67
  },
68
  {
69
- "epoch": 26.666666666666668,
70
- "grad_norm": 0.333984375,
71
- "learning_rate": 0.00016691306063588583,
72
- "loss": 10.0398,
73
  "step": 80
74
  },
75
  {
76
- "epoch": 30.0,
77
- "grad_norm": 0.337890625,
78
- "learning_rate": 0.00015877852522924732,
79
- "loss": 9.9895,
80
  "step": 90
81
  },
82
  {
83
- "epoch": 33.333333333333336,
84
- "grad_norm": 0.341796875,
85
- "learning_rate": 0.00015000000000000001,
86
- "loss": 9.9424,
87
- "step": 100
88
- },
89
- {
90
- "epoch": 36.666666666666664,
91
- "grad_norm": 0.34375,
92
- "learning_rate": 0.00014067366430758004,
93
- "loss": 9.8995,
94
- "step": 110
95
- },
96
- {
97
- "epoch": 40.0,
98
- "grad_norm": 0.345703125,
99
- "learning_rate": 0.00013090169943749476,
100
- "loss": 9.859,
101
- "step": 120
102
- },
103
- {
104
- "epoch": 43.333333333333336,
105
- "grad_norm": 0.34765625,
106
- "learning_rate": 0.00012079116908177593,
107
- "loss": 9.8216,
108
- "step": 130
109
- },
110
- {
111
- "epoch": 46.666666666666664,
112
- "grad_norm": 0.3515625,
113
- "learning_rate": 0.00011045284632676536,
114
- "loss": 9.7872,
115
- "step": 140
116
- },
117
- {
118
- "epoch": 50.0,
119
- "grad_norm": 0.357421875,
120
- "learning_rate": 0.0001,
121
- "loss": 9.7569,
122
- "step": 150
123
- },
124
- {
125
- "epoch": 53.333333333333336,
126
- "grad_norm": 0.35546875,
127
- "learning_rate": 8.954715367323468e-05,
128
- "loss": 9.7325,
129
- "step": 160
130
- },
131
- {
132
- "epoch": 56.666666666666664,
133
- "grad_norm": 0.359375,
134
- "learning_rate": 7.920883091822408e-05,
135
- "loss": 9.712,
136
- "step": 170
137
- },
138
- {
139
- "epoch": 60.0,
140
- "grad_norm": 0.359375,
141
- "learning_rate": 6.909830056250527e-05,
142
- "loss": 9.697,
143
- "step": 180
144
- },
145
- {
146
- "epoch": 63.333333333333336,
147
- "grad_norm": 0.361328125,
148
- "learning_rate": 5.9326335692419995e-05,
149
- "loss": 9.6841,
150
- "step": 190
151
- },
152
- {
153
- "epoch": 66.66666666666667,
154
- "grad_norm": 0.361328125,
155
- "learning_rate": 5.000000000000002e-05,
156
- "loss": 9.6746,
157
- "step": 200
158
- },
159
- {
160
- "epoch": 66.66666666666667,
161
- "eval_loss": 9.681697845458984,
162
- "eval_runtime": 0.9613,
163
- "eval_samples_per_second": 1561.466,
164
- "eval_steps_per_second": 6.242,
165
- "step": 200
166
- },
167
- {
168
- "epoch": 70.0,
169
- "grad_norm": 0.36328125,
170
- "learning_rate": 4.12214747707527e-05,
171
- "loss": 9.6678,
172
- "step": 210
173
- },
174
- {
175
- "epoch": 73.33333333333333,
176
- "grad_norm": 0.36328125,
177
- "learning_rate": 3.308693936411421e-05,
178
- "loss": 9.6641,
179
- "step": 220
180
- },
181
- {
182
- "epoch": 76.66666666666667,
183
- "grad_norm": 0.36328125,
184
- "learning_rate": 2.5685517452260567e-05,
185
- "loss": 9.6616,
186
- "step": 230
187
- },
188
- {
189
- "epoch": 80.0,
190
- "grad_norm": 0.36328125,
191
- "learning_rate": 1.9098300562505266e-05,
192
- "loss": 9.6605,
193
- "step": 240
194
- },
195
- {
196
- "epoch": 83.33333333333333,
197
- "grad_norm": 0.365234375,
198
- "learning_rate": 1.339745962155613e-05,
199
- "loss": 9.6596,
200
- "step": 250
201
- },
202
- {
203
- "epoch": 86.66666666666667,
204
- "grad_norm": 0.36328125,
205
- "learning_rate": 8.645454235739903e-06,
206
- "loss": 9.6597,
207
- "step": 260
208
- },
209
- {
210
- "epoch": 90.0,
211
- "grad_norm": 0.36328125,
212
- "learning_rate": 4.8943483704846475e-06,
213
- "loss": 9.6595,
214
- "step": 270
215
- },
216
- {
217
- "epoch": 93.33333333333333,
218
- "grad_norm": 0.361328125,
219
- "learning_rate": 2.1852399266194314e-06,
220
- "loss": 9.6595,
221
- "step": 280
222
- },
223
- {
224
- "epoch": 96.66666666666667,
225
- "grad_norm": 0.3671875,
226
- "learning_rate": 5.478104631726711e-07,
227
- "loss": 9.659,
228
- "step": 290
229
- },
230
- {
231
- "epoch": 100.0,
232
- "grad_norm": 0.36328125,
233
  "learning_rate": 0.0,
234
- "loss": 9.6596,
235
- "step": 300
236
  }
237
  ],
238
  "logging_steps": 10,
239
- "max_steps": 300,
240
  "num_input_tokens_seen": 0,
241
- "num_train_epochs": 100,
242
  "save_steps": 200,
243
  "stateful_callbacks": {
244
  "TrainerControl": {
@@ -252,7 +104,7 @@
252
  "attributes": {}
253
  }
254
  },
255
- "total_flos": 490990259404800.0,
256
  "train_batch_size": 32,
257
  "trial_name": null,
258
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.666666666666668,
5
  "eval_steps": 200,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.16666666666666666,
13
+ "eval_loss": 10.376375198364258,
14
+ "eval_runtime": 2.3455,
15
+ "eval_samples_per_second": 639.941,
16
+ "eval_steps_per_second": 5.116,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 1.6666666666666665,
21
+ "grad_norm": 0.09375,
22
+ "learning_rate": 0.00019863613034027224,
23
+ "loss": 10.3756,
24
  "step": 10
25
  },
26
  {
27
+ "epoch": 3.3333333333333335,
28
+ "grad_norm": 0.12353515625,
29
+ "learning_rate": 0.0001879473751206489,
30
+ "loss": 10.3632,
31
  "step": 20
32
  },
33
  {
34
+ "epoch": 5.0,
35
+ "grad_norm": 0.2236328125,
36
+ "learning_rate": 0.00016772815716257412,
37
+ "loss": 10.3433,
38
  "step": 30
39
  },
40
  {
41
+ "epoch": 6.666666666666667,
42
+ "grad_norm": 0.3203125,
43
+ "learning_rate": 0.00014016954246529696,
44
+ "loss": 10.3073,
45
  "step": 40
46
  },
47
  {
48
+ "epoch": 8.333333333333334,
49
+ "grad_norm": 0.330078125,
50
+ "learning_rate": 0.00010825793454723325,
51
+ "loss": 10.2602,
52
  "step": 50
53
  },
54
  {
55
+ "epoch": 10.0,
56
+ "grad_norm": 0.326171875,
57
+ "learning_rate": 7.54514512859201e-05,
58
+ "loss": 10.2203,
59
  "step": 60
60
  },
61
  {
62
+ "epoch": 11.666666666666666,
63
  "grad_norm": 0.326171875,
64
+ "learning_rate": 4.530518418775733e-05,
65
+ "loss": 10.1945,
66
  "step": 70
67
  },
68
  {
69
+ "epoch": 13.333333333333334,
70
+ "grad_norm": 0.328125,
71
+ "learning_rate": 2.1085949060360654e-05,
72
+ "loss": 10.1812,
73
  "step": 80
74
  },
75
  {
76
+ "epoch": 15.0,
77
+ "grad_norm": 0.328125,
78
+ "learning_rate": 5.418275829936537e-06,
79
+ "loss": 10.1773,
80
  "step": 90
81
  },
82
  {
83
+ "epoch": 16.666666666666668,
84
+ "grad_norm": 0.328125,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  "learning_rate": 0.0,
86
+ "loss": 10.1767,
87
+ "step": 100
88
  }
89
  ],
90
  "logging_steps": 10,
91
+ "max_steps": 100,
92
  "num_input_tokens_seen": 0,
93
+ "num_train_epochs": 17,
94
  "save_steps": 200,
95
  "stateful_callbacks": {
96
  "TrainerControl": {
 
104
  "attributes": {}
105
  }
106
  },
107
+ "total_flos": 81831709900800.0,
108
  "train_batch_size": 32,
109
  "trial_name": null,
110
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9629b97ec393ef093e4840cb93f3f7c6eafc6cfa1b3cb229133bf54718ca98cb
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11cf48786efac37806223cc8882d4253d84ff8c3599dc92c9fda0e12bc8a651f
3
  size 6840