SystemAdmin123 commited on
Commit
6d1c604
·
verified ·
1 Parent(s): 783e8da

Training in progress, step 1240, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22aa8c039d0be9a3c988facf84625571134d37cda476dfb65d98532416555fe4
3
  size 2433024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a7a5bf2ae53fe913692899a49310566217dbeffbb0199ff9046888fb5aa7575
3
  size 2433024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff4dbb252d5ebeffba6f21f8f84c970e70325f1ed27556618e23cfc59b40ad4f
3
  size 2498406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b51ee5c39f2196a17052a3369869248cbff6bb81824726e673d7bacb4845000
3
  size 2498406
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8ca8a36414e90dd5ba456c333f960e4889ffb54999d5803cb989c580c8cd717
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a69f6ea192cfcf54ef28bb2d6bd807102ec49e655245cd88a8999a1025beaa56
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f0d401f661ee31f2b765dcd2b32c58ff6fac5ec7dffb7f7ab574aa7b39adad9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd5bf2b6b2f2b239cbf378c9ab279dbd312ccd339731dbb40b178c5d2117f1fa
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:852aa6ce51e35f784a4dc0ec9d5711e42c32f8d80ac14d6387fd99a96cb20be9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc3d6b5de9f89ae92adeb3ebb45551760dedacf84ef470629b9572c81c7e5e15
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed8b4821ecf65ce0ec23d8413389d2f0386e72d57248abf4db5d646f8e2d49fb
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a659c65d62a0ad4ef2400d4a9efa53bb95f33c9a3510b01c06df806373098fd
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca81e9e996d0a71d202120d863a18b550eb8c713480df1e52307bfd9b3cc6ce3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7945d23ad3f18f8d933786bc7ca2663bfc4a07a89b86602528c15c8de525cba4
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a81c9d10706052a5945cb04e2d871e4de45e07953c5c957c59f663d6b1cb94f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3564e5e1fb5118c052847f294bdd8157abfa1387eac82f725487e3fb9285512
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d62eb2d573e4666268810fad55dc5ad08bca6def78acee1ff9463881b2cf7628
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd61a764da978cdc05855ec977013888fcda81d8e3f693f0b43c1fa27207c8cf
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11f10650ff981eaf0f064d2744bd6a541b50f4a191760c1885276196f11a060b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c21d6eb44e4c3a4a2aa63996253ecd1b46d24a99d863d3828abc8a3058eb6753
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:427276ae77d918ee2b880ea4152618640d39ea76588856ca2cd62fe2ab8b83d7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd12f9f53e74c951a51712fa65ed54e535568d6d68567892b5b94a3a28396fca
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4201183431952662,
5
  "eval_steps": 20,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1335,6 +1335,50 @@
1335
  "eval_samples_per_second": 423.301,
1336
  "eval_steps_per_second": 26.492,
1337
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1338
  }
1339
  ],
1340
  "logging_steps": 10,
@@ -1354,7 +1398,7 @@
1354
  "attributes": {}
1355
  }
1356
  },
1357
- "total_flos": 470216540160.0,
1358
  "train_batch_size": 2,
1359
  "trial_name": null,
1360
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.467455621301775,
5
  "eval_steps": 20,
6
+ "global_step": 1240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1335
  "eval_samples_per_second": 423.301,
1336
  "eval_steps_per_second": 26.492,
1337
  "step": 1200
1338
+ },
1339
+ {
1340
+ "epoch": 1.4319526627218935,
1341
+ "grad_norm": 0.451171875,
1342
+ "learning_rate": 0.0001135169494631497,
1343
+ "loss": 10.4216,
1344
+ "step": 1210
1345
+ },
1346
+ {
1347
+ "epoch": 1.4437869822485208,
1348
+ "grad_norm": 0.62890625,
1349
+ "learning_rate": 0.00011220516908034601,
1350
+ "loss": 10.4063,
1351
+ "step": 1220
1352
+ },
1353
+ {
1354
+ "epoch": 1.4437869822485208,
1355
+ "eval_loss": 10.49565315246582,
1356
+ "eval_runtime": 3.5534,
1357
+ "eval_samples_per_second": 422.698,
1358
+ "eval_steps_per_second": 26.454,
1359
+ "step": 1220
1360
+ },
1361
+ {
1362
+ "epoch": 1.4556213017751478,
1363
+ "grad_norm": 0.5,
1364
+ "learning_rate": 0.00011089125314635726,
1365
+ "loss": 10.4023,
1366
+ "step": 1230
1367
+ },
1368
+ {
1369
+ "epoch": 1.467455621301775,
1370
+ "grad_norm": 0.494140625,
1371
+ "learning_rate": 0.00010957543155842702,
1372
+ "loss": 10.3866,
1373
+ "step": 1240
1374
+ },
1375
+ {
1376
+ "epoch": 1.467455621301775,
1377
+ "eval_loss": 10.491253852844238,
1378
+ "eval_runtime": 3.6425,
1379
+ "eval_samples_per_second": 412.352,
1380
+ "eval_steps_per_second": 25.806,
1381
+ "step": 1240
1382
  }
1383
  ],
1384
  "logging_steps": 10,
 
1398
  "attributes": {}
1399
  }
1400
  },
1401
+ "total_flos": 485882265600.0,
1402
  "train_batch_size": 2,
1403
  "trial_name": null,
1404
  "trial_params": null