Training in progress, step 20, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/model-00001-of-00004.safetensors +1 -1
last-checkpoint/model-00002-of-00004.safetensors +1 -1
last-checkpoint/model-00003-of-00004.safetensors +1 -1
last-checkpoint/model-00004-of-00004.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +18 -144
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76e466c580fe14588089dcc7d286c83cd652f7c0a24eccc5457f63a2c1a6744b
 size 4976698672

 version https://git-lfs.github.com/spec/v1
+oid sha256:44ab72b4709d6ffbeb35af35f341004d363c90fd1eda36628e6d84de407c80bf
 size 4976698672

last-checkpoint/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46673c0f6373d0edea145cf1c76d151e7e4002b0f392305989819179f47d88d2
 size 4999802720

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ab6f792622c0c35fd0799cc8a9d4255063a06471fff7aa9c635e67635f7286f
 size 4999802720

last-checkpoint/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2181f67f278f731239c7b02e50926de87b1f74983952474350708269c5dd6a24
 size 4915916176

 version https://git-lfs.github.com/spec/v1
+oid sha256:567bbc43b31b395b0d27e12f1dc5930e0dafe7f7b99b4e60685919b90587ae4c
 size 4915916176

last-checkpoint/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a31a8df2e382dbfa72c7ca55ade38083f4b79558c330f104b4e7917d67b1c3e
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:75e3a8fbbb21272764f003e8b4f7a98ed04d0de1487f3c39a257ca07988b0161
 size 1168138808

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3233a68936feec1fb7b28adf511d152994f3fd4a66f95f806271d01bbb201c2
 size 16311821124

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b5fa7e25560e0e69c7447925bbecf06781cddc7ad3a142309b7ea1097dbfd80
 size 16311821124

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69a04a1208f7a0d6f51f37a136b5c2e55bf3f53b3d0fd57164c5b83ca47a2645
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f60241cb6cb86cf9966e8cfe2248be00bce643b5808e2c3b78c9cb618eea253
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:080a7e72d6be938a9418e60003db90412af8a61e6434f9e9f1b598cca861dbcd
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e76feebe960d60536ad1ed0bcaee2e12a3f8432f33b7ee3b0cae559b12130c0
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3d114a75d37be476b865187eb2b3d29d9343b131614a08f42be0014f110ce6f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f5988c600823ef881ed3900c9909420e69870efab70abf3dca0673a3c88b057
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fc5a0f78838743362c5d5378dff81ea2f7d0039da53a423f1759e861bc6b233
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7090a3c6759641db81e3ee589636615551bb1b7ce0948f2fd4ab7d7beb35de9c
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86aa1c590799d718328ad7b7198db3fa4678198705c85eb25b7f257d9e38e2cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:86a562280f18454416a11d2a32a1435679f44d10b11637c5297815c06d499163
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.127659574468085,
-  "eval_steps": 200,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11,165 +11,39 @@
     {
       "epoch": 0.010638297872340425,
       "eval_loss": 2.7702033519744873,
-      "eval_runtime": 30.6604,
-      "eval_samples_per_second": 48.956,
-      "eval_steps_per_second": 6.132,
       "step": 1
     },
     {
       "epoch": 0.10638297872340426,
-      "grad_norm": 5.40625,
       "learning_rate": 8e-05,
-      "loss": 2.0559,
       "step": 10
     },
     {
       "epoch": 0.2127659574468085,
-      "grad_norm": 5.25,
       "learning_rate": 0.00016,
-      "loss": 2.0743,
       "step": 20
     },
     {
-      "epoch": 0.3191489361702128,
-      "grad_norm": 5.75,
-      "learning_rate": 0.00019994532573409262,
-      "loss": 2.4587,
-      "step": 30
-    },
-    {
-      "epoch": 0.425531914893617,
-      "grad_norm": 6.03125,
-      "learning_rate": 0.00019950829025450114,
-      "loss": 2.6661,
-      "step": 40
-    },
-    {
-      "epoch": 0.5319148936170213,
-      "grad_norm": 6.75,
-      "learning_rate": 0.00019863613034027224,
-      "loss": 2.8731,
-      "step": 50
-    },
-    {
-      "epoch": 0.6382978723404256,
-      "grad_norm": 4.0625,
-      "learning_rate": 0.0001973326597248006,
-      "loss": 2.8111,
-      "step": 60
-    },
-    {
-      "epoch": 0.7446808510638298,
-      "grad_norm": 5.59375,
-      "learning_rate": 0.00019560357815343577,
-      "loss": 2.8978,
-      "step": 70
-    },
-    {
-      "epoch": 0.851063829787234,
-      "grad_norm": 7.09375,
-      "learning_rate": 0.0001934564464599461,
-      "loss": 2.9503,
-      "step": 80
-    },
-    {
-      "epoch": 0.9574468085106383,
-      "grad_norm": 10.125,
-      "learning_rate": 0.00019090065350491626,
-      "loss": 2.9875,
-      "step": 90
-    },
-    {
-      "epoch": 1.0638297872340425,
-      "grad_norm": 5.34375,
-      "learning_rate": 0.0001879473751206489,
-      "loss": 2.1433,
-      "step": 100
-    },
-    {
-      "epoch": 1.1702127659574468,
-      "grad_norm": 5.1875,
-      "learning_rate": 0.00018460952524209355,
-      "loss": 1.4928,
-      "step": 110
-    },
-    {
-      "epoch": 1.2765957446808511,
-      "grad_norm": 4.90625,
-      "learning_rate": 0.00018090169943749476,
-      "loss": 1.6588,
-      "step": 120
-    },
-    {
-      "epoch": 1.3829787234042552,
-      "grad_norm": 4.59375,
-      "learning_rate": 0.00017684011108568592,
-      "loss": 1.5762,
-      "step": 130
-    },
-    {
-      "epoch": 1.4893617021276595,
-      "grad_norm": 4.46875,
-      "learning_rate": 0.00017244252047910892,
-      "loss": 1.6862,
-      "step": 140
-    },
-    {
-      "epoch": 1.5957446808510638,
-      "grad_norm": 4.59375,
-      "learning_rate": 0.00016772815716257412,
-      "loss": 1.6834,
-      "step": 150
-    },
-    {
-      "epoch": 1.702127659574468,
-      "grad_norm": 4.46875,
-      "learning_rate": 0.0001627176358473537,
-      "loss": 1.6762,
-      "step": 160
-    },
-    {
-      "epoch": 1.8085106382978724,
-      "grad_norm": 4.5625,
-      "learning_rate": 0.00015743286626829437,
-      "loss": 1.7259,
-      "step": 170
-    },
-    {
-      "epoch": 1.9148936170212765,
-      "grad_norm": 4.84375,
-      "learning_rate": 0.00015189695737812152,
-      "loss": 1.8411,
-      "step": 180
-    },
-    {
-      "epoch": 2.021276595744681,
-      "grad_norm": 3.53125,
-      "learning_rate": 0.0001461341162978688,
-      "loss": 1.5208,
-      "step": 190
-    },
-    {
-      "epoch": 2.127659574468085,
-      "grad_norm": 2.75,
-      "learning_rate": 0.00014016954246529696,
-      "loss": 0.6512,
-      "step": 200
-    },
-    {
-      "epoch": 2.127659574468085,
-      "eval_loss": 3.396796941757202,
-      "eval_runtime": 28.9515,
-      "eval_samples_per_second": 51.845,
-      "eval_steps_per_second": 6.494,
-      "step": 200
     }
   ],
   "logging_steps": 10,
   "max_steps": 500,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 6,
-  "save_steps": 200,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -182,7 +56,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4755282835996672e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2127659574468085,
+  "eval_steps": 20,
+  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
     {
       "epoch": 0.010638297872340425,
       "eval_loss": 2.7702033519744873,
+      "eval_runtime": 32.7713,
+      "eval_samples_per_second": 45.802,
+      "eval_steps_per_second": 5.737,
       "step": 1
     },
     {
       "epoch": 0.10638297872340426,
+      "grad_norm": 5.4375,
       "learning_rate": 8e-05,
+      "loss": 2.0564,
       "step": 10
     },
     {
       "epoch": 0.2127659574468085,
+      "grad_norm": 5.375,
       "learning_rate": 0.00016,
+      "loss": 2.073,
       "step": 20
     },
     {
+      "epoch": 0.2127659574468085,
+      "eval_loss": 2.1077733039855957,
+      "eval_runtime": 29.718,
+      "eval_samples_per_second": 50.508,
+      "eval_steps_per_second": 6.326,
+      "step": 20
     }
   ],
   "logging_steps": 10,
   "max_steps": 500,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 6,
+  "save_steps": 20,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
       "attributes": {}
     }
   },
+  "total_flos": 1.4755282835996672e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8dd3a0af1f706fbf33a681f8d5ee9dd18fc80aa1558af309107d63117c714c75
 size 7032

 version https://git-lfs.github.com/spec/v1
+oid sha256:46c7728cf6d1151c0535e539428181ba72626c4c7cb50ebf1d0cd8c8ffd1ed8d
 size 7032