Training in progress, step 1280, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +47 -3
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2433024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76da35fcd2eceb4682b02e4d6f4efd1f654fac0b25429d515156667dd817308a
|
3 |
size 2433024
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2498406
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f660c44323f7dc691c0421faf42c40cc765dca5c21d11bff643a623b003967a0
|
3 |
size 2498406
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b3ba644702b51ab6ceb3cfb78b0712e10c56d7a898133b9cf775673605d71a6
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a575d13944792443c2e0faf6af7562460ac52c5628f17b918747a2fad55be01
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3378545ae3c0bb849e88bd71db9a81f0556e610f7a1d6ea4af902425e910afa6
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3581eff136704b2f763bfac1cfe6d4ca215660e05f32de8938f8c598bdd09e1
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4710f39f928214c6084305a96ed4d69309abcb477c5fe9ea3b79644e92349f58
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17f2040b06510c4ed4dae6986e8f9b63891a60d4b9fa2e8045fc74b430abf05b
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f0418024306b84efbd06ecb22274609094ccd51161118b224af78a4c9aa3c2e
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d46bb0bbf258912802dbc17d20db208430fc3ab3a923169293e1cbe07fb7ae7
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25cf578d94ac1a7be20caf3c6bf3d856ece0554beb81a56caa6f17d994e34988
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1379,6 +1379,50 @@
|
|
1379 |
"eval_samples_per_second": 412.352,
|
1380 |
"eval_steps_per_second": 25.806,
|
1381 |
"step": 1240
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1382 |
}
|
1383 |
],
|
1384 |
"logging_steps": 10,
|
@@ -1398,7 +1442,7 @@
|
|
1398 |
"attributes": {}
|
1399 |
}
|
1400 |
},
|
1401 |
-
"total_flos":
|
1402 |
"train_batch_size": 2,
|
1403 |
"trial_name": null,
|
1404 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.514792899408284,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 1280,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1379 |
"eval_samples_per_second": 412.352,
|
1380 |
"eval_steps_per_second": 25.806,
|
1381 |
"step": 1240
|
1382 |
+
},
|
1383 |
+
{
|
1384 |
+
"epoch": 1.4792899408284024,
|
1385 |
+
"grad_norm": 0.51171875,
|
1386 |
+
"learning_rate": 0.00010825793454723325,
|
1387 |
+
"loss": 10.4014,
|
1388 |
+
"step": 1250
|
1389 |
+
},
|
1390 |
+
{
|
1391 |
+
"epoch": 1.4911242603550297,
|
1392 |
+
"grad_norm": 0.453125,
|
1393 |
+
"learning_rate": 0.00010693899263660441,
|
1394 |
+
"loss": 10.4494,
|
1395 |
+
"step": 1260
|
1396 |
+
},
|
1397 |
+
{
|
1398 |
+
"epoch": 1.4911242603550297,
|
1399 |
+
"eval_loss": 10.489725112915039,
|
1400 |
+
"eval_runtime": 3.7506,
|
1401 |
+
"eval_samples_per_second": 400.469,
|
1402 |
+
"eval_steps_per_second": 25.063,
|
1403 |
+
"step": 1260
|
1404 |
+
},
|
1405 |
+
{
|
1406 |
+
"epoch": 1.502958579881657,
|
1407 |
+
"grad_norm": 0.66015625,
|
1408 |
+
"learning_rate": 0.00010561883660318455,
|
1409 |
+
"loss": 10.4229,
|
1410 |
+
"step": 1270
|
1411 |
+
},
|
1412 |
+
{
|
1413 |
+
"epoch": 1.514792899408284,
|
1414 |
+
"grad_norm": 0.5,
|
1415 |
+
"learning_rate": 0.00010429769743605407,
|
1416 |
+
"loss": 10.4095,
|
1417 |
+
"step": 1280
|
1418 |
+
},
|
1419 |
+
{
|
1420 |
+
"epoch": 1.514792899408284,
|
1421 |
+
"eval_loss": 10.488317489624023,
|
1422 |
+
"eval_runtime": 3.5347,
|
1423 |
+
"eval_samples_per_second": 424.926,
|
1424 |
+
"eval_steps_per_second": 26.593,
|
1425 |
+
"step": 1280
|
1426 |
}
|
1427 |
],
|
1428 |
"logging_steps": 10,
|
|
|
1442 |
"attributes": {}
|
1443 |
}
|
1444 |
},
|
1445 |
+
"total_flos": 501547991040.0,
|
1446 |
"train_batch_size": 2,
|
1447 |
"trial_name": null,
|
1448 |
"trial_params": null
|