Training in progress, step 53200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1217,6 +1217,10 @@ You can finetune this model on your own dataset.
|
|
1217 |
| 0.9348 | 52900 | 0.2546 |
|
1218 |
| 0.9357 | 52950 | 0.2433 |
|
1219 |
| 0.9365 | 53000 | 0.3632 |
|
|
|
|
|
|
|
|
|
1220 |
|
1221 |
|
1222 |
### Framework Versions
|
|
|
1217 |
| 0.9348 | 52900 | 0.2546 |
|
1218 |
| 0.9357 | 52950 | 0.2433 |
|
1219 |
| 0.9365 | 53000 | 0.3632 |
|
1220 |
+
| 0.9374 | 53050 | 0.2602 |
|
1221 |
+
| 0.9383 | 53100 | 0.2066 |
|
1222 |
+
| 0.9392 | 53150 | 0.2957 |
|
1223 |
+
| 0.9401 | 53200 | 0.3931 |
|
1224 |
|
1225 |
|
1226 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2487922e51e359a9f1ecb6a445d3e636440e65c32739f52f813e51dba1e67cff
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb6a3019423076f9d2bcde60bdb5789ba741f63b281497780b4c7a5a01452766
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:703c575158d027ee9439c8f4b7cba9394179cb8e338f46e41a82b465fd641366
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d78afd0a4dfa1ad614feedb884bc52a6d0ef53d711277b9eb7759daf275b74c
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c6628a730df5612a115afdb07517d1848beb0c28c4d9cd90e5531c3a4536e32
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7428,6 +7428,34 @@
|
|
7428 |
"learning_rate": 3.5459739647758733e-06,
|
7429 |
"loss": 0.3632,
|
7430 |
"step": 53000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7431 |
}
|
7432 |
],
|
7433 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9400788111183758,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 53200,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7428 |
"learning_rate": 3.5459739647758733e-06,
|
7429 |
"loss": 0.3632,
|
7430 |
"step": 53000
|
7431 |
+
},
|
7432 |
+
{
|
7433 |
+
"epoch": 0.9374282129667262,
|
7434 |
+
"grad_norm": 2.5942931175231934,
|
7435 |
+
"learning_rate": 3.496887946437337e-06,
|
7436 |
+
"loss": 0.2602,
|
7437 |
+
"step": 53050
|
7438 |
+
},
|
7439 |
+
{
|
7440 |
+
"epoch": 0.9383117456839427,
|
7441 |
+
"grad_norm": 1.256638526916504,
|
7442 |
+
"learning_rate": 3.4478019280988005e-06,
|
7443 |
+
"loss": 0.2066,
|
7444 |
+
"step": 53100
|
7445 |
+
},
|
7446 |
+
{
|
7447 |
+
"epoch": 0.9391952784011592,
|
7448 |
+
"grad_norm": 3.677544593811035,
|
7449 |
+
"learning_rate": 3.399697630127035e-06,
|
7450 |
+
"loss": 0.2957,
|
7451 |
+
"step": 53150
|
7452 |
+
},
|
7453 |
+
{
|
7454 |
+
"epoch": 0.9400788111183758,
|
7455 |
+
"grad_norm": 1.3518919944763184,
|
7456 |
+
"learning_rate": 3.3506116117884983e-06,
|
7457 |
+
"loss": 0.3931,
|
7458 |
+
"step": 53200
|
7459 |
}
|
7460 |
],
|
7461 |
"logging_steps": 50,
|