Training in progress, step 10500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5eb8bf06dd538fcb262c2fed2f9e68d7952360b525571db7ca0f1430e447d9ed
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:013308b4dc1251389723706bb70a4c12dd3e0f1c0451dc06722fb7fff47c38dc
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:942c2875abedaddb5c9436b198254369aa4f7b28a3b4b68fd6fcf41053e028fd
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:580eafdf89a0de8a5bf71611b2d46376bc396d69d739d11c02cf0fa1c01e5d26
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a7ea66d1531ee601a7eaa0403a8186669a6c59db91f4cad349d74bc0115c72f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b375d24b4f51731ac4974850b0a2bf3cfbc153b3c9a53e800d669342a8ff2a30
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7631547026bec9cd7a6ea58b5f8fb2fb117b688cae951965f0f9ff628a1476de
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:62b81d85aecb38c3a9a2c050795de755f75a43f89e29b6a5cc7c6ab514e2f67e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.08249519765377045,
-  "best_model_checkpoint": "./fine-tuned/checkpoint-10000",
-  "epoch": 1.6,
   "eval_steps": 500,
-  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1567,6 +1567,84 @@
       "eval_samples_per_second": 17.101,
       "eval_steps_per_second": 2.138,
       "step": 10000
     }
   ],
   "logging_steps": 50,
@@ -1586,7 +1664,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.87166312448e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.08225961029529572,
+  "best_model_checkpoint": "./fine-tuned/checkpoint-10500",
+  "epoch": 1.6800000000000002,
   "eval_steps": 500,
+  "global_step": 10500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.101,
       "eval_steps_per_second": 2.138,
       "step": 10000
+    },
+    {
+      "epoch": 1.608,
+      "grad_norm": 5240.361328125,
+      "learning_rate": 1.392e-05,
+      "loss": 0.0546,
+      "step": 10050
+    },
+    {
+      "epoch": 1.616,
+      "grad_norm": 7000.00927734375,
+      "learning_rate": 1.384e-05,
+      "loss": 0.0535,
+      "step": 10100
+    },
+    {
+      "epoch": 1.624,
+      "grad_norm": 8141.75048828125,
+      "learning_rate": 1.376e-05,
+      "loss": 0.0555,
+      "step": 10150
+    },
+    {
+      "epoch": 1.6320000000000001,
+      "grad_norm": 6566.3662109375,
+      "learning_rate": 1.3680000000000001e-05,
+      "loss": 0.0518,
+      "step": 10200
+    },
+    {
+      "epoch": 1.6400000000000001,
+      "grad_norm": 7028.8935546875,
+      "learning_rate": 1.36e-05,
+      "loss": 0.0572,
+      "step": 10250
+    },
+    {
+      "epoch": 1.6480000000000001,
+      "grad_norm": 13007.5703125,
+      "learning_rate": 1.352e-05,
+      "loss": 0.0567,
+      "step": 10300
+    },
+    {
+      "epoch": 1.6560000000000001,
+      "grad_norm": 6286.06640625,
+      "learning_rate": 1.344e-05,
+      "loss": 0.0529,
+      "step": 10350
+    },
+    {
+      "epoch": 1.6640000000000001,
+      "grad_norm": 6360.68408203125,
+      "learning_rate": 1.336e-05,
+      "loss": 0.054,
+      "step": 10400
+    },
+    {
+      "epoch": 1.6720000000000002,
+      "grad_norm": 8098.84228515625,
+      "learning_rate": 1.328e-05,
+      "loss": 0.0592,
+      "step": 10450
+    },
+    {
+      "epoch": 1.6800000000000002,
+      "grad_norm": 6886.65283203125,
+      "learning_rate": 1.32e-05,
+      "loss": 0.0524,
+      "step": 10500
+    },
+    {
+      "epoch": 1.6800000000000002,
+      "eval_loss": 0.08225961029529572,
+      "eval_runtime": 116.8647,
+      "eval_samples_per_second": 17.114,
+      "eval_steps_per_second": 2.139,
+      "step": 10500
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 5.115246280704e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null