Training in progress, step 400, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +152 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54199da57b837d4891196f02c89063f1b4c8051da494ceb732eca28e0e049899
 size 2200119864

 version https://git-lfs.github.com/spec/v1
+oid sha256:d49c67c9a9666f30da75813d84d4f345d605af04f3db06de5d25afb0ea920121
 size 2200119864

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afb6588ffa25521011a1286f0128a2f605dab5da48172dece4259b51767d871d
-size 2235234810

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffa9854a24c3214762143f01506e34bdb6a7f70c7cd9c9204e36de47b73b262d
+size 2235235002

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f2762d792d815ec78151cdfb1183ff8fe6b1a4c5fcc050ac87b15dc66050802
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a5bab486e54fec167df9035ee491641ac4693e6fee5143f139d8f5e78d33bb4
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbc1f875219f8ac3c776408d86b647fbc8462ad7ecb23ac468a078c2c50ec46e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:2eca9038103aafcb78807df6c0d9a976f86161b975a7d39850ba8b30af810dc6
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbcdba7bc63bee2e58552ca3df8d3d9521da29d066231bf49913479cc60a05a4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:88bd6fc8a4cf304bf8ccc7371cacf9e2e0ff157c5b5651b58a181d0a1c5c46b1
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac162cf8f57eb46f3e3f12f50328cd5574b5384b6fe27493b80f0979cc8d636b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:75dad65b2c4702dc22bd40914a23f91917390cf5c9bf829ecd6acffcfa98c50c
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abcd078b1186023c3032f4652f953246e76b5f62233ca3f894e881ea9feb17b7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:78c268aaacedf0424c20d98a7500a3ae5948e221bb4ac7fd103a57d4b9664fd8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 11.764705882352942,
   "eval_steps": 200,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -163,6 +163,154 @@
       "eval_samples_per_second": 247.958,
       "eval_steps_per_second": 5.286,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -177,12 +325,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.2203604110737408e+17,
   "train_batch_size": 12,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 23.529411764705884,
   "eval_steps": 200,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 247.958,
       "eval_steps_per_second": 5.286,
       "step": 200
+    },
+    {
+      "epoch": 12.352941176470589,
+      "grad_norm": 0.03076171875,
+      "learning_rate": 0.0001,
+      "loss": 0.0022,
+      "step": 210
+    },
+    {
+      "epoch": 12.941176470588236,
+      "grad_norm": 0.026123046875,
+      "learning_rate": 9.174206545276677e-05,
+      "loss": 0.0021,
+      "step": 220
+    },
+    {
+      "epoch": 13.529411764705882,
+      "grad_norm": 0.0230712890625,
+      "learning_rate": 8.35405409719266e-05,
+      "loss": 0.002,
+      "step": 230
+    },
+    {
+      "epoch": 14.117647058823529,
+      "grad_norm": 0.0223388671875,
+      "learning_rate": 7.54514512859201e-05,
+      "loss": 0.0021,
+      "step": 240
+    },
+    {
+      "epoch": 14.705882352941176,
+      "grad_norm": 0.0225830078125,
+      "learning_rate": 6.753005307953167e-05,
+      "loss": 0.002,
+      "step": 250
+    },
+    {
+      "epoch": 15.294117647058824,
+      "grad_norm": 0.0191650390625,
+      "learning_rate": 5.983045753470308e-05,
+      "loss": 0.002,
+      "step": 260
+    },
+    {
+      "epoch": 15.882352941176471,
+      "grad_norm": 0.0191650390625,
+      "learning_rate": 5.240526069629265e-05,
+      "loss": 0.002,
+      "step": 270
+    },
+    {
+      "epoch": 16.470588235294116,
+      "grad_norm": 0.01953125,
+      "learning_rate": 4.530518418775733e-05,
+      "loss": 0.0019,
+      "step": 280
+    },
+    {
+      "epoch": 17.058823529411764,
+      "grad_norm": 0.0213623046875,
+      "learning_rate": 3.857872873103322e-05,
+      "loss": 0.002,
+      "step": 290
+    },
+    {
+      "epoch": 17.647058823529413,
+      "grad_norm": 0.0230712890625,
+      "learning_rate": 3.227184283742591e-05,
+      "loss": 0.0019,
+      "step": 300
+    },
+    {
+      "epoch": 18.235294117647058,
+      "grad_norm": 0.0208740234375,
+      "learning_rate": 2.6427608932686843e-05,
+      "loss": 0.0019,
+      "step": 310
+    },
+    {
+      "epoch": 18.823529411764707,
+      "grad_norm": 0.0228271484375,
+      "learning_rate": 2.1085949060360654e-05,
+      "loss": 0.0019,
+      "step": 320
+    },
+    {
+      "epoch": 19.41176470588235,
+      "grad_norm": 0.02587890625,
+      "learning_rate": 1.6283352173747145e-05,
+      "loss": 0.002,
+      "step": 330
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 0.021484375,
+      "learning_rate": 1.2052624879351104e-05,
+      "loss": 0.0019,
+      "step": 340
+    },
+    {
+      "epoch": 20.58823529411765,
+      "grad_norm": 0.0208740234375,
+      "learning_rate": 8.422667334494249e-06,
+      "loss": 0.002,
+      "step": 350
+    },
+    {
+      "epoch": 21.176470588235293,
+      "grad_norm": 0.02294921875,
+      "learning_rate": 5.418275829936537e-06,
+      "loss": 0.0019,
+      "step": 360
+    },
+    {
+      "epoch": 21.764705882352942,
+      "grad_norm": 0.020751953125,
+      "learning_rate": 3.059973406066963e-06,
+      "loss": 0.002,
+      "step": 370
+    },
+    {
+      "epoch": 22.352941176470587,
+      "grad_norm": 0.0203857421875,
+      "learning_rate": 1.3638696597277679e-06,
+      "loss": 0.0019,
+      "step": 380
+    },
+    {
+      "epoch": 22.941176470588236,
+      "grad_norm": 0.0240478515625,
+      "learning_rate": 3.415506993330153e-07,
+      "loss": 0.0019,
+      "step": 390
+    },
+    {
+      "epoch": 23.529411764705884,
+      "grad_norm": 0.0218505859375,
+      "learning_rate": 0.0,
+      "loss": 0.0019,
+      "step": 400
+    },
+    {
+      "epoch": 23.529411764705884,
+      "eval_loss": 3.6231682300567627,
+      "eval_runtime": 6.0013,
+      "eval_samples_per_second": 250.112,
+      "eval_steps_per_second": 5.332,
+      "step": 400
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.4407208221474816e+17,
   "train_batch_size": 12,
   "trial_name": null,
   "trial_params": null