Training in progress, step 54400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1241,6 +1241,10 @@ You can finetune this model on your own dataset.
|
|
1241 |
| 0.9560 | 54100 | 0.3496 |
|
1242 |
| 0.9569 | 54150 | 0.2609 |
|
1243 |
| 0.9577 | 54200 | 0.3942 |
|
|
|
|
|
|
|
|
|
1244 |
|
1245 |
|
1246 |
### Framework Versions
|
|
|
1241 |
| 0.9560 | 54100 | 0.3496 |
|
1242 |
| 0.9569 | 54150 | 0.2609 |
|
1243 |
| 0.9577 | 54200 | 0.3942 |
|
1244 |
+
| 0.9586 | 54250 | 0.3794 |
|
1245 |
+
| 0.9595 | 54300 | 0.2834 |
|
1246 |
+
| 0.9604 | 54350 | 0.3271 |
|
1247 |
+
| 0.9613 | 54400 | 0.3048 |
|
1248 |
|
1249 |
|
1250 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fcfe428b137d6be6e57435ee624e959e084b7aee0c0d6305bb7c44169a76f60
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:741ae581a8dbe07cedeb95e8dcf3b75fa2e45d451017ebafac3dd8de4a759fd3
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d18b9a5d8e4821cc664cde79b567aa4a381dd9a2d469a4b24834ee49788c1be1
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a37f9bfdc7ecebb937aa8d7e396f6a300da6da1986d237a8a3a3f1a8fc4e0d2b
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c578b2def8a7ed655790145b2a44449f69c54bd690e5b820438921a87509936b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7596,6 +7596,34 @@
|
|
7596 |
"learning_rate": 2.3688912450177693e-06,
|
7597 |
"loss": 0.3942,
|
7598 |
"step": 54200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7599 |
}
|
7600 |
],
|
7601 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9612835963315721,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 54400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7596 |
"learning_rate": 2.3688912450177693e-06,
|
7597 |
"loss": 0.3942,
|
7598 |
"step": 54200
|
7599 |
+
},
|
7600 |
+
{
|
7601 |
+
"epoch": 0.9586329981799226,
|
7602 |
+
"grad_norm": 1.4870383739471436,
|
7603 |
+
"learning_rate": 2.319805226679233e-06,
|
7604 |
+
"loss": 0.3794,
|
7605 |
+
"step": 54250
|
7606 |
+
},
|
7607 |
+
{
|
7608 |
+
"epoch": 0.9595165308971392,
|
7609 |
+
"grad_norm": 3.1990461349487305,
|
7610 |
+
"learning_rate": 2.2707192083406965e-06,
|
7611 |
+
"loss": 0.2834,
|
7612 |
+
"step": 54300
|
7613 |
+
},
|
7614 |
+
{
|
7615 |
+
"epoch": 0.9604000636143556,
|
7616 |
+
"grad_norm": 1.178895115852356,
|
7617 |
+
"learning_rate": 2.22163319000216e-06,
|
7618 |
+
"loss": 0.3271,
|
7619 |
+
"step": 54350
|
7620 |
+
},
|
7621 |
+
{
|
7622 |
+
"epoch": 0.9612835963315721,
|
7623 |
+
"grad_norm": 1.724674105644226,
|
7624 |
+
"learning_rate": 2.1725471716636233e-06,
|
7625 |
+
"loss": 0.3048,
|
7626 |
+
"step": 54400
|
7627 |
}
|
7628 |
],
|
7629 |
"logging_steps": 50,
|