SystemAdmin123 commited on
Commit
c71e1cf
·
verified ·
1 Parent(s): 9d5e616

Training in progress, step 1280, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a7a5bf2ae53fe913692899a49310566217dbeffbb0199ff9046888fb5aa7575
3
  size 2433024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76da35fcd2eceb4682b02e4d6f4efd1f654fac0b25429d515156667dd817308a
3
  size 2433024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b51ee5c39f2196a17052a3369869248cbff6bb81824726e673d7bacb4845000
3
  size 2498406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f660c44323f7dc691c0421faf42c40cc765dca5c21d11bff643a623b003967a0
3
  size 2498406
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a69f6ea192cfcf54ef28bb2d6bd807102ec49e655245cd88a8999a1025beaa56
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b3ba644702b51ab6ceb3cfb78b0712e10c56d7a898133b9cf775673605d71a6
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd5bf2b6b2f2b239cbf378c9ab279dbd312ccd339731dbb40b178c5d2117f1fa
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a575d13944792443c2e0faf6af7562460ac52c5628f17b918747a2fad55be01
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc3d6b5de9f89ae92adeb3ebb45551760dedacf84ef470629b9572c81c7e5e15
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3378545ae3c0bb849e88bd71db9a81f0556e610f7a1d6ea4af902425e910afa6
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a659c65d62a0ad4ef2400d4a9efa53bb95f33c9a3510b01c06df806373098fd
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3581eff136704b2f763bfac1cfe6d4ca215660e05f32de8938f8c598bdd09e1
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7945d23ad3f18f8d933786bc7ca2663bfc4a07a89b86602528c15c8de525cba4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4710f39f928214c6084305a96ed4d69309abcb477c5fe9ea3b79644e92349f58
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3564e5e1fb5118c052847f294bdd8157abfa1387eac82f725487e3fb9285512
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f2040b06510c4ed4dae6986e8f9b63891a60d4b9fa2e8045fc74b430abf05b
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd61a764da978cdc05855ec977013888fcda81d8e3f693f0b43c1fa27207c8cf
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0418024306b84efbd06ecb22274609094ccd51161118b224af78a4c9aa3c2e
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c21d6eb44e4c3a4a2aa63996253ecd1b46d24a99d863d3828abc8a3058eb6753
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d46bb0bbf258912802dbc17d20db208430fc3ab3a923169293e1cbe07fb7ae7
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd12f9f53e74c951a51712fa65ed54e535568d6d68567892b5b94a3a28396fca
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25cf578d94ac1a7be20caf3c6bf3d856ece0554beb81a56caa6f17d994e34988
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.467455621301775,
5
  "eval_steps": 20,
6
- "global_step": 1240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1379,6 +1379,50 @@
1379
  "eval_samples_per_second": 412.352,
1380
  "eval_steps_per_second": 25.806,
1381
  "step": 1240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1382
  }
1383
  ],
1384
  "logging_steps": 10,
@@ -1398,7 +1442,7 @@
1398
  "attributes": {}
1399
  }
1400
  },
1401
- "total_flos": 485882265600.0,
1402
  "train_batch_size": 2,
1403
  "trial_name": null,
1404
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.514792899408284,
5
  "eval_steps": 20,
6
+ "global_step": 1280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1379
  "eval_samples_per_second": 412.352,
1380
  "eval_steps_per_second": 25.806,
1381
  "step": 1240
1382
+ },
1383
+ {
1384
+ "epoch": 1.4792899408284024,
1385
+ "grad_norm": 0.51171875,
1386
+ "learning_rate": 0.00010825793454723325,
1387
+ "loss": 10.4014,
1388
+ "step": 1250
1389
+ },
1390
+ {
1391
+ "epoch": 1.4911242603550297,
1392
+ "grad_norm": 0.453125,
1393
+ "learning_rate": 0.00010693899263660441,
1394
+ "loss": 10.4494,
1395
+ "step": 1260
1396
+ },
1397
+ {
1398
+ "epoch": 1.4911242603550297,
1399
+ "eval_loss": 10.489725112915039,
1400
+ "eval_runtime": 3.7506,
1401
+ "eval_samples_per_second": 400.469,
1402
+ "eval_steps_per_second": 25.063,
1403
+ "step": 1260
1404
+ },
1405
+ {
1406
+ "epoch": 1.502958579881657,
1407
+ "grad_norm": 0.66015625,
1408
+ "learning_rate": 0.00010561883660318455,
1409
+ "loss": 10.4229,
1410
+ "step": 1270
1411
+ },
1412
+ {
1413
+ "epoch": 1.514792899408284,
1414
+ "grad_norm": 0.5,
1415
+ "learning_rate": 0.00010429769743605407,
1416
+ "loss": 10.4095,
1417
+ "step": 1280
1418
+ },
1419
+ {
1420
+ "epoch": 1.514792899408284,
1421
+ "eval_loss": 10.488317489624023,
1422
+ "eval_runtime": 3.5347,
1423
+ "eval_samples_per_second": 424.926,
1424
+ "eval_steps_per_second": 26.593,
1425
+ "step": 1280
1426
  }
1427
  ],
1428
  "logging_steps": 10,
 
1442
  "attributes": {}
1443
  }
1444
  },
1445
+ "total_flos": 501547991040.0,
1446
  "train_batch_size": 2,
1447
  "trial_name": null,
1448
  "trial_params": null