Training in progress, step 56200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1277,6 +1277,10 @@ You can finetune this model on your own dataset.
|
|
1277 |
| 0.9878 | 55900 | 0.241 |
|
1278 |
| 0.9887 | 55950 | 0.2616 |
|
1279 |
| 0.9896 | 56000 | 0.2572 |
|
|
|
|
|
|
|
|
|
1280 |
|
1281 |
|
1282 |
### Framework Versions
|
|
|
1277 |
| 0.9878 | 55900 | 0.241 |
|
1278 |
| 0.9887 | 55950 | 0.2616 |
|
1279 |
| 0.9896 | 56000 | 0.2572 |
|
1280 |
+
| 0.9904 | 56050 | 0.3437 |
|
1281 |
+
| 0.9913 | 56100 | 0.2681 |
|
1282 |
+
| 0.9922 | 56150 | 0.2366 |
|
1283 |
+
| 0.9931 | 56200 | 0.2505 |
|
1284 |
|
1285 |
|
1286 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a32dcecc7a280e374129a3cf22989595ed2fddc46218250361b6a75860e81ea
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e902c9d495246ac0efa5a76c4a3f41b5141efdc728e2bf3190fe35a55a5b2e1
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7a835c96aac6cd628ade43289fa29f54a47dd56e61db80b153e797a52ab69e8
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b83e73e3f126bc90ca6d8e599f4164120b914905a3300fff248d49cf0cac104
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10cac6c490bb3f5b4fbbd2a825be2153a2b1171fe6ae7ae8be377957525cab19
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7848,6 +7848,34 @@
|
|
7848 |
"learning_rate": 6.027763051972277e-07,
|
7849 |
"loss": 0.2572,
|
7850 |
"step": 56000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7851 |
}
|
7852 |
],
|
7853 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9930907741513668,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 56200,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7848 |
"learning_rate": 6.027763051972277e-07,
|
7849 |
"loss": 0.2572,
|
7850 |
"step": 56000
|
7851 |
+
},
|
7852 |
+
{
|
7853 |
+
"epoch": 0.9904401759997172,
|
7854 |
+
"grad_norm": 1.814112663269043,
|
7855 |
+
"learning_rate": 5.536902868586912e-07,
|
7856 |
+
"loss": 0.3437,
|
7857 |
+
"step": 56050
|
7858 |
+
},
|
7859 |
+
{
|
7860 |
+
"epoch": 0.9913237087169338,
|
7861 |
+
"grad_norm": 2.0368192195892334,
|
7862 |
+
"learning_rate": 5.046042685201547e-07,
|
7863 |
+
"loss": 0.2681,
|
7864 |
+
"step": 56100
|
7865 |
+
},
|
7866 |
+
{
|
7867 |
+
"epoch": 0.9922072414341503,
|
7868 |
+
"grad_norm": 1.4389891624450684,
|
7869 |
+
"learning_rate": 4.555182501816183e-07,
|
7870 |
+
"loss": 0.2366,
|
7871 |
+
"step": 56150
|
7872 |
+
},
|
7873 |
+
{
|
7874 |
+
"epoch": 0.9930907741513668,
|
7875 |
+
"grad_norm": 2.772890567779541,
|
7876 |
+
"learning_rate": 4.0643223184308187e-07,
|
7877 |
+
"loss": 0.2505,
|
7878 |
+
"step": 56200
|
7879 |
}
|
7880 |
],
|
7881 |
"logging_steps": 50,
|