Training in progress, step 11000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:013308b4dc1251389723706bb70a4c12dd3e0f1c0451dc06722fb7fff47c38dc
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:6be50dd6f773aa1f48148cbd37fadbc53c1f82b20839270278b093c580fda84d
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:580eafdf89a0de8a5bf71611b2d46376bc396d69d739d11c02cf0fa1c01e5d26
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7a8731c7a1753c2a934fea42dc42bc1494d508ed21f32dcb78a226e403709f0
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b375d24b4f51731ac4974850b0a2bf3cfbc153b3c9a53e800d669342a8ff2a30
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad1edd41eefae5337989de90c37428566177e0258f6ab839f3f0a3e2bcd645ce
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62b81d85aecb38c3a9a2c050795de755f75a43f89e29b6a5cc7c6ab514e2f67e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:62568f1a18857cf8edd17d9d189f58f7644089636cf8dea79c190056990aaec9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.08225961029529572,
-  "best_model_checkpoint": "./fine-tuned/checkpoint-10500",
-  "epoch": 1.6800000000000002,
   "eval_steps": 500,
-  "global_step": 10500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1645,6 +1645,84 @@
       "eval_samples_per_second": 17.114,
       "eval_steps_per_second": 2.139,
       "step": 10500
     }
   ],
   "logging_steps": 50,
@@ -1664,7 +1742,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.115246280704e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.08220627158880234,
+  "best_model_checkpoint": "./fine-tuned/checkpoint-11000",
+  "epoch": 1.76,
   "eval_steps": 500,
+  "global_step": 11000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.114,
       "eval_steps_per_second": 2.139,
       "step": 10500
+    },
+    {
+      "epoch": 1.688,
+      "grad_norm": 5443.7119140625,
+      "learning_rate": 1.3120000000000001e-05,
+      "loss": 0.0554,
+      "step": 10550
+    },
+    {
+      "epoch": 1.696,
+      "grad_norm": 6497.8408203125,
+      "learning_rate": 1.304e-05,
+      "loss": 0.057,
+      "step": 10600
+    },
+    {
+      "epoch": 1.704,
+      "grad_norm": 5618.49853515625,
+      "learning_rate": 1.296e-05,
+      "loss": 0.0498,
+      "step": 10650
+    },
+    {
+      "epoch": 1.712,
+      "grad_norm": 7447.96728515625,
+      "learning_rate": 1.288e-05,
+      "loss": 0.0568,
+      "step": 10700
+    },
+    {
+      "epoch": 1.72,
+      "grad_norm": 8283.306640625,
+      "learning_rate": 1.2800000000000001e-05,
+      "loss": 0.0566,
+      "step": 10750
+    },
+    {
+      "epoch": 1.728,
+      "grad_norm": 7497.0419921875,
+      "learning_rate": 1.272e-05,
+      "loss": 0.0502,
+      "step": 10800
+    },
+    {
+      "epoch": 1.736,
+      "grad_norm": 8445.2421875,
+      "learning_rate": 1.2640000000000001e-05,
+      "loss": 0.0562,
+      "step": 10850
+    },
+    {
+      "epoch": 1.744,
+      "grad_norm": 15980.0498046875,
+      "learning_rate": 1.2560000000000002e-05,
+      "loss": 0.0588,
+      "step": 10900
+    },
+    {
+      "epoch": 1.752,
+      "grad_norm": 5444.55615234375,
+      "learning_rate": 1.2479999999999999e-05,
+      "loss": 0.0564,
+      "step": 10950
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 7009.3037109375,
+      "learning_rate": 1.24e-05,
+      "loss": 0.0549,
+      "step": 11000
+    },
+    {
+      "epoch": 1.76,
+      "eval_loss": 0.08220627158880234,
+      "eval_runtime": 116.957,
+      "eval_samples_per_second": 17.1,
+      "eval_steps_per_second": 2.138,
+      "step": 11000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 5.358829436928e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null