SystemAdmin123 commited on
Commit
2b4ff00
·
verified ·
1 Parent(s): 1f9d8d8

Training in progress, step 280, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57aa943ba31b7fd0f2d2258b3638435908884af1e83b7e3da9763ba67b95fa40
3
  size 136062744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b02f3bea30ba5f4000f2757ed1fa28858b738010261b54d239d104557caaaf1
3
  size 136062744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78f609766fd9499e6af357bcd74eef24836222dee0149f07897fd8895e50aade
3
- size 272133748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf4819b116e20e0cf6edb7b9a514d0c38a1681ed9fb664e8b1387fb6e27e99ca
3
+ size 272133812
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e80a2275824ab49f6bc0b217bb315cd0a85d3c25b43a245828495794a78d4d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a021bf8fc1bd19adcc1376dcee299d8a04aa1a7952251bad1e317521748875b7
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:381eb7d8287a93e17a40cc15be93d534da9dbf37378fcc74868d5615daf19b34
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972bd8028f20fce3573923f9a005027e0d260b48904a7835117b203b223afc00
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d4d987ee650d278db90b1b49f5d5e57d81bba91b4e110659d4027a225f63078
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:346eed5082d2ec3cd3b01057d77087e12608217fe1db4e2cc48a1c635d2b350f
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bfd04834fa55090f0aa6f19062eb69d5e7e7d567f3b51b2a09c93679da782f7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8617efc3472b5b019323214ba6f5ff8db4e44b4f8dae20eed03655cbbbdeade
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e15a8fd81fd90d6fe35aa6feb35c5e13dd4fe18af2950ff7fcf4c6b68016d32
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57d29062e2b005ef9338306d164f8255bdbae7cde6979cc3c6601ddda4f3ab8
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4d603165c2d1acc537a09a3e1f8f3831fbd36a555d1b4282034bf9a666af8e7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7294aa3359669dd9326bd90c9c9925cb82746ef54c24e800e87f0555b79b28
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3b4cdeeb7d7c2d37111aeb034296baee0b0b647a48bc49f1ac03a01bf25b677
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:712c430310813615c3ea651f2526c9d2f01c1c820ddb6bf6771a7983456fddf7
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:887f0c6920abc07c1199dc922f55301b3e567adfbbf72707fdf5afb2c202b331
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b34f186dd0d24d76f54563508cd3f031ee1155efb43c5a5a9cfcc9ce2e166bce
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:941d9e9f4cfb6894bf574771af69c852f299b452dcf03e677dae3dadf692a003
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f71fff5908e20a1b3137a87f6a31d497f79c8c2b801a7eea008ae86f7863417
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.530805687203792,
5
  "eval_steps": 200,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -191,6 +191,34 @@
191
  "learning_rate": 0.0001977240649801253,
192
  "loss": 2.055,
193
  "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  }
195
  ],
196
  "logging_steps": 10,
@@ -210,7 +238,7 @@
210
  "attributes": {}
211
  }
212
  },
213
- "total_flos": 3.267013366723379e+16,
214
  "train_batch_size": 1,
215
  "trial_name": null,
216
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.2843601895734595,
5
  "eval_steps": 200,
6
+ "global_step": 280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
191
  "learning_rate": 0.0001977240649801253,
192
  "loss": 2.055,
193
  "step": 240
194
+ },
195
+ {
196
+ "epoch": 4.720379146919432,
197
+ "grad_norm": 1.921875,
198
+ "learning_rate": 0.00019744105246469263,
199
+ "loss": 1.857,
200
+ "step": 250
201
+ },
202
+ {
203
+ "epoch": 4.909952606635071,
204
+ "grad_norm": 2.0,
205
+ "learning_rate": 0.00019714168639517544,
206
+ "loss": 2.0555,
207
+ "step": 260
208
+ },
209
+ {
210
+ "epoch": 5.0947867298578196,
211
+ "grad_norm": 2.6875,
212
+ "learning_rate": 0.0001968260170142496,
213
+ "loss": 1.7808,
214
+ "step": 270
215
+ },
216
+ {
217
+ "epoch": 5.2843601895734595,
218
+ "grad_norm": 1.75,
219
+ "learning_rate": 0.00019649409730077935,
220
+ "loss": 1.7822,
221
+ "step": 280
222
  }
223
  ],
224
  "logging_steps": 10,
 
238
  "attributes": {}
239
  }
240
  },
241
+ "total_flos": 3.810376470757376e+16,
242
  "train_batch_size": 1,
243
  "trial_name": null,
244
  "trial_params": null