Training in progress, step 500, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +7 -212
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:653c6bba71c5381d01bd043dc2c055bff5332dd7c7b2584287d7e14c2d7ee9d1
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:2141320f9b8a2cd3772fb223d1131baebd71974e2bf87b6a1c24de6bf13c8fae
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ea6c528e61d3383dbf8b7d2e9e49dde698241c4bd67ffba410e3666e0023ede
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ee2854b2510e7dccd90f9604ed994b1eb7a8d1050daf4f2c17001c85658d2b8
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d492ae092e6f5b76a2098bc809b3c1bf0702a65bf8be974795457efc1921df9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce122efa58437e9eb4167be3235ff668c46393aa7e777fe0c234b56b42a65288
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32ed8f7e313bb93709f94e944fc8cc81a5cde7370033d4c148cc7bbc922ddb47
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd4b192ffac4efaba4f3cd35aa1226766929983bc37c1c74305bd3d40b48b106
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,224 +1,19 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.0,
   "eval_steps": 500,
-  "global_step": 14436,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.10390689941812137,
-      "grad_norm": 1.8855979442596436,
-      "learning_rate": 1.9310058187863676e-05,
-      "loss": 0.8022,
       "step": 500
-    },
-    {
-      "epoch": 0.20781379883624274,
-      "grad_norm": 0.8622527122497559,
-      "learning_rate": 1.86173455250762e-05,
-      "loss": 0.5316,
-      "step": 1000
-    },
-    {
-      "epoch": 0.3117206982543641,
-      "grad_norm": 1.404678225517273,
-      "learning_rate": 1.7924632862288724e-05,
-      "loss": 0.4832,
-      "step": 1500
-    },
-    {
-      "epoch": 0.41562759767248547,
-      "grad_norm": 1.3559819459915161,
-      "learning_rate": 1.7233305624826823e-05,
-      "loss": 0.4518,
-      "step": 2000
-    },
-    {
-      "epoch": 0.5195344970906068,
-      "grad_norm": 0.8163271546363831,
-      "learning_rate": 1.6540592962039347e-05,
-      "loss": 0.4389,
-      "step": 2500
-    },
-    {
-      "epoch": 0.6234413965087282,
-      "grad_norm": 0.8109046816825867,
-      "learning_rate": 1.584788029925187e-05,
-      "loss": 0.4193,
-      "step": 3000
-    },
-    {
-      "epoch": 0.7273482959268496,
-      "grad_norm": 1.0217444896697998,
-      "learning_rate": 1.5155167636464397e-05,
-      "loss": 0.4215,
-      "step": 3500
-    },
-    {
-      "epoch": 0.8312551953449709,
-      "grad_norm": 1.6476292610168457,
-      "learning_rate": 1.446245497367692e-05,
-      "loss": 0.4062,
-      "step": 4000
-    },
-    {
-      "epoch": 0.9351620947630923,
-      "grad_norm": 1.4694277048110962,
-      "learning_rate": 1.3769742310889445e-05,
-      "loss": 0.4029,
-      "step": 4500
-    },
-    {
-      "epoch": 1.0,
-      "eval_loss": 0.3212089240550995,
-      "eval_runtime": 31.4577,
-      "eval_samples_per_second": 15.704,
-      "eval_steps_per_second": 7.852,
-      "step": 4812
-    },
-    {
-      "epoch": 1.0390689941812137,
-      "grad_norm": 0.6561925411224365,
-      "learning_rate": 1.3077029648101969e-05,
-      "loss": 0.4059,
-      "step": 5000
-    },
-    {
-      "epoch": 1.142975893599335,
-      "grad_norm": 0.6741281747817993,
-      "learning_rate": 1.2384316985314493e-05,
-      "loss": 0.3594,
-      "step": 5500
-    },
-    {
-      "epoch": 1.2468827930174564,
-      "grad_norm": 0.48619207739830017,
-      "learning_rate": 1.1691604322527017e-05,
-      "loss": 0.3736,
-      "step": 6000
-    },
-    {
-      "epoch": 1.3507896924355778,
-      "grad_norm": 1.1009119749069214,
-      "learning_rate": 1.099889165973954e-05,
-      "loss": 0.3624,
-      "step": 6500
-    },
-    {
-      "epoch": 1.4546965918536992,
-      "grad_norm": 0.3497615456581116,
-      "learning_rate": 1.0306178996952066e-05,
-      "loss": 0.3516,
-      "step": 7000
-    },
-    {
-      "epoch": 1.5586034912718203,
-      "grad_norm": 1.4209001064300537,
-      "learning_rate": 9.61346633416459e-06,
-      "loss": 0.3565,
-      "step": 7500
-    },
-    {
-      "epoch": 1.6625103906899419,
-      "grad_norm": 0.8116744160652161,
-      "learning_rate": 8.920753671377114e-06,
-      "loss": 0.3635,
-      "step": 8000
-    },
-    {
-      "epoch": 1.766417290108063,
-      "grad_norm": 0.8015578985214233,
-      "learning_rate": 8.228041008589638e-06,
-      "loss": 0.3549,
-      "step": 8500
-    },
-    {
-      "epoch": 1.8703241895261846,
-      "grad_norm": 0.7980790734291077,
-      "learning_rate": 7.536713771127737e-06,
-      "loss": 0.3495,
-      "step": 9000
-    },
-    {
-      "epoch": 1.9742310889443058,
-      "grad_norm": 1.4501579999923706,
-      "learning_rate": 6.845386533665836e-06,
-      "loss": 0.3385,
-      "step": 9500
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 0.29613471031188965,
-      "eval_runtime": 31.4504,
-      "eval_samples_per_second": 15.707,
-      "eval_steps_per_second": 7.854,
-      "step": 9624
-    },
-    {
-      "epoch": 2.0781379883624274,
-      "grad_norm": 0.6130263209342957,
-      "learning_rate": 6.15267387087836e-06,
-      "loss": 0.3293,
-      "step": 10000
-    },
-    {
-      "epoch": 2.1820448877805485,
-      "grad_norm": 1.2724053859710693,
-      "learning_rate": 5.459961208090885e-06,
-      "loss": 0.3369,
-      "step": 10500
-    },
-    {
-      "epoch": 2.28595178719867,
-      "grad_norm": 0.7700533270835876,
-      "learning_rate": 4.767248545303408e-06,
-      "loss": 0.3387,
-      "step": 11000
-    },
-    {
-      "epoch": 2.3898586866167912,
-      "grad_norm": 1.4450799226760864,
-      "learning_rate": 4.0759213078415074e-06,
-      "loss": 0.3411,
-      "step": 11500
-    },
-    {
-      "epoch": 2.493765586034913,
-      "grad_norm": 0.8265316486358643,
-      "learning_rate": 3.3832086450540318e-06,
-      "loss": 0.317,
-      "step": 12000
-    },
-    {
-      "epoch": 2.597672485453034,
-      "grad_norm": 1.4168757200241089,
-      "learning_rate": 2.690495982266556e-06,
-      "loss": 0.3176,
-      "step": 12500
-    },
-    {
-      "epoch": 2.7015793848711556,
-      "grad_norm": 1.0274338722229004,
-      "learning_rate": 1.99778331947908e-06,
-      "loss": 0.3389,
-      "step": 13000
-    },
-    {
-      "epoch": 2.8054862842892767,
-      "grad_norm": 1.6529736518859863,
-      "learning_rate": 1.3050706566916044e-06,
-      "loss": 0.3163,
-      "step": 13500
-    },
-    {
-      "epoch": 2.9093931837073983,
-      "grad_norm": 0.7652114629745483,
-      "learning_rate": 6.123579939041286e-07,
-      "loss": 0.3267,
-      "step": 14000
     }
   ],
   "logging_steps": 500,
@@ -233,12 +28,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.758000534257664e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.10390689941812137,
   "eval_steps": 500,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.10390689941812137,
+      "grad_norm": 3.488266944885254,
+      "learning_rate": 1.9311443613189252e-05,
+      "loss": 0.8436,
       "step": 500
     }
   ],
   "logging_steps": 500,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 608957890560000.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:458592354564e0c56775e9ca6e222e6e9b0676dd27e2547cdd1181aaf3301fdb
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f1dbee35c77e893d48b1da8b35396070180d26658b9fe560d8ed118dfc2d009
 size 5432