Training in progress, step 1240, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +47 -3
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2433024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a7a5bf2ae53fe913692899a49310566217dbeffbb0199ff9046888fb5aa7575
|
3 |
size 2433024
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2498406
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b51ee5c39f2196a17052a3369869248cbff6bb81824726e673d7bacb4845000
|
3 |
size 2498406
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a69f6ea192cfcf54ef28bb2d6bd807102ec49e655245cd88a8999a1025beaa56
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd5bf2b6b2f2b239cbf378c9ab279dbd312ccd339731dbb40b178c5d2117f1fa
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc3d6b5de9f89ae92adeb3ebb45551760dedacf84ef470629b9572c81c7e5e15
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a659c65d62a0ad4ef2400d4a9efa53bb95f33c9a3510b01c06df806373098fd
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7945d23ad3f18f8d933786bc7ca2663bfc4a07a89b86602528c15c8de525cba4
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3564e5e1fb5118c052847f294bdd8157abfa1387eac82f725487e3fb9285512
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd61a764da978cdc05855ec977013888fcda81d8e3f693f0b43c1fa27207c8cf
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c21d6eb44e4c3a4a2aa63996253ecd1b46d24a99d863d3828abc8a3058eb6753
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd12f9f53e74c951a51712fa65ed54e535568d6d68567892b5b94a3a28396fca
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1335,6 +1335,50 @@
|
|
1335 |
"eval_samples_per_second": 423.301,
|
1336 |
"eval_steps_per_second": 26.492,
|
1337 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1338 |
}
|
1339 |
],
|
1340 |
"logging_steps": 10,
|
@@ -1354,7 +1398,7 @@
|
|
1354 |
"attributes": {}
|
1355 |
}
|
1356 |
},
|
1357 |
-
"total_flos":
|
1358 |
"train_batch_size": 2,
|
1359 |
"trial_name": null,
|
1360 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.467455621301775,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 1240,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1335 |
"eval_samples_per_second": 423.301,
|
1336 |
"eval_steps_per_second": 26.492,
|
1337 |
"step": 1200
|
1338 |
+
},
|
1339 |
+
{
|
1340 |
+
"epoch": 1.4319526627218935,
|
1341 |
+
"grad_norm": 0.451171875,
|
1342 |
+
"learning_rate": 0.0001135169494631497,
|
1343 |
+
"loss": 10.4216,
|
1344 |
+
"step": 1210
|
1345 |
+
},
|
1346 |
+
{
|
1347 |
+
"epoch": 1.4437869822485208,
|
1348 |
+
"grad_norm": 0.62890625,
|
1349 |
+
"learning_rate": 0.00011220516908034601,
|
1350 |
+
"loss": 10.4063,
|
1351 |
+
"step": 1220
|
1352 |
+
},
|
1353 |
+
{
|
1354 |
+
"epoch": 1.4437869822485208,
|
1355 |
+
"eval_loss": 10.49565315246582,
|
1356 |
+
"eval_runtime": 3.5534,
|
1357 |
+
"eval_samples_per_second": 422.698,
|
1358 |
+
"eval_steps_per_second": 26.454,
|
1359 |
+
"step": 1220
|
1360 |
+
},
|
1361 |
+
{
|
1362 |
+
"epoch": 1.4556213017751478,
|
1363 |
+
"grad_norm": 0.5,
|
1364 |
+
"learning_rate": 0.00011089125314635726,
|
1365 |
+
"loss": 10.4023,
|
1366 |
+
"step": 1230
|
1367 |
+
},
|
1368 |
+
{
|
1369 |
+
"epoch": 1.467455621301775,
|
1370 |
+
"grad_norm": 0.494140625,
|
1371 |
+
"learning_rate": 0.00010957543155842702,
|
1372 |
+
"loss": 10.3866,
|
1373 |
+
"step": 1240
|
1374 |
+
},
|
1375 |
+
{
|
1376 |
+
"epoch": 1.467455621301775,
|
1377 |
+
"eval_loss": 10.491253852844238,
|
1378 |
+
"eval_runtime": 3.6425,
|
1379 |
+
"eval_samples_per_second": 412.352,
|
1380 |
+
"eval_steps_per_second": 25.806,
|
1381 |
+
"step": 1240
|
1382 |
}
|
1383 |
],
|
1384 |
"logging_steps": 10,
|
|
|
1398 |
"attributes": {}
|
1399 |
}
|
1400 |
},
|
1401 |
+
"total_flos": 485882265600.0,
|
1402 |
"train_batch_size": 2,
|
1403 |
"trial_name": null,
|
1404 |
"trial_params": null
|