Training in progress, step 9800, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1217,6 +1217,10 @@ You can finetune this model on your own dataset.
 | 0.1679 | 9500 | 0.3932        |
 | 0.1688 | 9550 | 0.4836        |
 | 0.1696 | 9600 | 0.3989        |
 ### Framework Versions

 | 0.1679 | 9500 | 0.3932        |
 | 0.1688 | 9550 | 0.4836        |
 | 0.1696 | 9600 | 0.3989        |
+| 0.1705 | 9650 | 0.4025        |
+| 0.1714 | 9700 | 0.467         |
+| 0.1723 | 9750 | 0.3558        |
+| 0.1732 | 9800 | 0.3623        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d75f2ae04decbc38e9837063052796e9f2bb5b93ba4b3967ba78efa578f524f
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb531b99dca162667fb726e612e94c051018f779f5ff509bebfc939722f85886
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d7bd92a04e9045a8d7bd34505c04408ff4f945e28a7e521fda8627bb4a5ffb8
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:4671b0ce660e002e126649d2065bbd8f4633842c0c71f60f512ccc9dba9aba21
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4991e7b45094aac9ff0a60591e26561675ae2318a690a5d07c69cdb47d8654a9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:95f0e0a231d2de9fe39fce57e75a586db12e27f102a687e2901f534fc55060b6
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:498b0c6fbe9530a64ad6460f8bb9ba5c88d4dacacfc84f82ced2672249165959
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:99e39c7d3d290833603a74523e3d4fc84b4f0f3056845fa8a3e71170347650dd
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b09b7aeb1f1c748a56856918139834bbfcfbfa3a7f47fdb7435e88f5b14f5a1d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a105c4972c9f577b54a9a86ea803b0dfd0803e1dbb9c812134e5869ca00a97d7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.16963828170557155,
   "eval_steps": 500,
-  "global_step": 9600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1352,6 +1352,34 @@
       "learning_rate": 4.61330034752901e-05,
       "loss": 0.3989,
       "step": 9600
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.17317241257443763,
   "eval_steps": 500,
+  "global_step": 9800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.61330034752901e-05,
       "loss": 0.3989,
       "step": 9600
+    },
+    {
+      "epoch": 0.17052181442278808,
+      "grad_norm": 1.5831292867660522,
+      "learning_rate": 4.608391745695156e-05,
+      "loss": 0.4025,
+      "step": 9650
+    },
+    {
+      "epoch": 0.1714053471400046,
+      "grad_norm": 5.1861371994018555,
+      "learning_rate": 4.6034831438613027e-05,
+      "loss": 0.467,
+      "step": 9700
+    },
+    {
+      "epoch": 0.1722888798572211,
+      "grad_norm": 3.7466721534729004,
+      "learning_rate": 4.598574542027449e-05,
+      "loss": 0.3558,
+      "step": 9750
+    },
+    {
+      "epoch": 0.17317241257443763,
+      "grad_norm": 2.143721342086792,
+      "learning_rate": 4.5936659401935956e-05,
+      "loss": 0.3623,
+      "step": 9800
     }
   ],
   "logging_steps": 50,