Training in progress, step 280, checkpoint

Files changed (12) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57aa943ba31b7fd0f2d2258b3638435908884af1e83b7e3da9763ba67b95fa40
 size 136062744

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b02f3bea30ba5f4000f2757ed1fa28858b738010261b54d239d104557caaaf1
 size 136062744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78f609766fd9499e6af357bcd74eef24836222dee0149f07897fd8895e50aade
-size 272133748

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf4819b116e20e0cf6edb7b9a514d0c38a1681ed9fb664e8b1387fb6e27e99ca
+size 272133812

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93e80a2275824ab49f6bc0b217bb315cd0a85d3c25b43a245828495794a78d4d
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a021bf8fc1bd19adcc1376dcee299d8a04aa1a7952251bad1e317521748875b7
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:381eb7d8287a93e17a40cc15be93d534da9dbf37378fcc74868d5615daf19b34
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:972bd8028f20fce3573923f9a005027e0d260b48904a7835117b203b223afc00
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d4d987ee650d278db90b1b49f5d5e57d81bba91b4e110659d4027a225f63078
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:346eed5082d2ec3cd3b01057d77087e12608217fe1db4e2cc48a1c635d2b350f
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bfd04834fa55090f0aa6f19062eb69d5e7e7d567f3b51b2a09c93679da782f7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8617efc3472b5b019323214ba6f5ff8db4e44b4f8dae20eed03655cbbbdeade
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e15a8fd81fd90d6fe35aa6feb35c5e13dd4fe18af2950ff7fcf4c6b68016d32
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b57d29062e2b005ef9338306d164f8255bdbae7cde6979cc3c6601ddda4f3ab8
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4d603165c2d1acc537a09a3e1f8f3831fbd36a555d1b4282034bf9a666af8e7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a7294aa3359669dd9326bd90c9c9925cb82746ef54c24e800e87f0555b79b28
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3b4cdeeb7d7c2d37111aeb034296baee0b0b647a48bc49f1ac03a01bf25b677
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:712c430310813615c3ea651f2526c9d2f01c1c820ddb6bf6771a7983456fddf7
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:887f0c6920abc07c1199dc922f55301b3e567adfbbf72707fdf5afb2c202b331
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b34f186dd0d24d76f54563508cd3f031ee1155efb43c5a5a9cfcc9ce2e166bce
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:941d9e9f4cfb6894bf574771af69c852f299b452dcf03e677dae3dadf692a003
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f71fff5908e20a1b3137a87f6a31d497f79c8c2b801a7eea008ae86f7863417
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.530805687203792,
   "eval_steps": 200,
-  "global_step": 240,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -191,6 +191,34 @@
       "learning_rate": 0.0001977240649801253,
       "loss": 2.055,
       "step": 240
     }
   ],
   "logging_steps": 10,
@@ -210,7 +238,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.267013366723379e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.2843601895734595,
   "eval_steps": 200,
+  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001977240649801253,
       "loss": 2.055,
       "step": 240
+    },
+    {
+      "epoch": 4.720379146919432,
+      "grad_norm": 1.921875,
+      "learning_rate": 0.00019744105246469263,
+      "loss": 1.857,
+      "step": 250
+    },
+    {
+      "epoch": 4.909952606635071,
+      "grad_norm": 2.0,
+      "learning_rate": 0.00019714168639517544,
+      "loss": 2.0555,
+      "step": 260
+    },
+    {
+      "epoch": 5.0947867298578196,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0001968260170142496,
+      "loss": 1.7808,
+      "step": 270
+    },
+    {
+      "epoch": 5.2843601895734595,
+      "grad_norm": 1.75,
+      "learning_rate": 0.00019649409730077935,
+      "loss": 1.7822,
+      "step": 280
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.810376470757376e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null