Training in progress, step 40, checkpoint

Files changed (7) hide show

last-checkpoint/model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49abecc3012aa33040e2fb01e32e561bb0b599ef920d57e5804dbf05cce2f142
 size 4943178720

 version https://git-lfs.github.com/spec/v1
+oid sha256:b907ba68261a733495a2decc931e4839e4907e6e52a97528b167a0a5a0ebd392
 size 4943178720

last-checkpoint/model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e268892504057e819d6c86952d153640cfeab12dfde0c669137107e7bbaa9a7
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf95b6219ea0c58409934588c2a66dc9c8321e127f35cf06df02c3acd88afc0e
 size 4999819336

last-checkpoint/model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba955cd0f1842463cd22c00dad17603532a55cb793303e24c624588deaf5affc
 size 4540532728

 version https://git-lfs.github.com/spec/v1
+oid sha256:76053a4a789be6b616ea43ab19cf1a53cfc29041a95eebbb8627e860734e1f52
 size 4540532728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ea56f435222638b51353e98a7a59318640daf28906ee7970d8fea972ee2fd54
 size 14710155092

 version https://git-lfs.github.com/spec/v1
+oid sha256:22617f0b8389a6d4fa5a7e5e259a2a6e9ac793955c55c10b715b8d4f9596c212
 size 14710155092

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5829e55a74209d86a905d56c797173146629e2a97eb8f2567e78aab055ee715
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:572996e9dc190ff3f3a6efdf58de929f593fa2dd2f97d74fed7646828651b17f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -11,44 +11,44 @@
     {
       "epoch": 0.019230769230769232,
       "eval_loss": 1.8673125505447388,
-      "eval_runtime": 15.0658,
-      "eval_samples_per_second": 99.63,
-      "eval_steps_per_second": 6.239,
       "step": 1
     },
     {
       "epoch": 0.19230769230769232,
-      "grad_norm": 346.0,
-      "learning_rate": 0.00013333333333333334,
-      "loss": 7.3336,
       "step": 10
     },
     {
       "epoch": 0.38461538461538464,
-      "grad_norm": 63.75,
-      "learning_rate": 0.00019984815164333163,
-      "loss": 10.1286,
       "step": 20
     },
     {
       "epoch": 0.5769230769230769,
-      "grad_norm": 1776.0,
-      "learning_rate": 0.00019863613034027224,
-      "loss": 10.0786,
       "step": 30
     },
     {
       "epoch": 0.7692307692307693,
-      "grad_norm": 45.0,
-      "learning_rate": 0.00019622680003092503,
-      "loss": 9.0353,
       "step": 40
     }
   ],
   "logging_steps": 10,
-  "max_steps": 300,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 6,
   "save_steps": 40,
   "stateful_callbacks": {
     "TrainerControl": {

     {
       "epoch": 0.019230769230769232,
       "eval_loss": 1.8673125505447388,
+      "eval_runtime": 15.1011,
+      "eval_samples_per_second": 99.397,
+      "eval_steps_per_second": 6.225,
       "step": 1
     },
     {
       "epoch": 0.19230769230769232,
+      "grad_norm": 36.0,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 2.4526,
       "step": 10
     },
     {
       "epoch": 0.38461538461538464,
+      "grad_norm": 231.0,
+      "learning_rate": 0.00013333333333333334,
+      "loss": 2.8068,
       "step": 20
     },
     {
       "epoch": 0.5769230769230769,
+      "grad_norm": 105.0,
+      "learning_rate": 0.0002,
+      "loss": 6.5887,
       "step": 30
     },
     {
       "epoch": 0.7692307692307693,
+      "grad_norm": 67.0,
+      "learning_rate": 0.00019984815164333163,
+      "loss": 9.409,
       "step": 40
     }
   ],
   "logging_steps": 10,
+  "max_steps": 600,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 12,
   "save_steps": 40,
   "stateful_callbacks": {
     "TrainerControl": {

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa8dcd878159cc59f87447028fd2e09931ebdac776af71b4ee5edce164448ac3
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:b41a9fb463c0db3738bb35eb54493dd7cd16792db1e56184a9c9b31d58bd7b14
 size 6776