Training in progress, step 56400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1281,6 +1281,10 @@ You can finetune this model on your own dataset.
|
|
1281 |
| 0.9913 | 56100 | 0.2681 |
|
1282 |
| 0.9922 | 56150 | 0.2366 |
|
1283 |
| 0.9931 | 56200 | 0.2505 |
|
|
|
|
|
|
|
|
|
1284 |
|
1285 |
|
1286 |
### Framework Versions
|
|
|
1281 |
| 0.9913 | 56100 | 0.2681 |
|
1282 |
| 0.9922 | 56150 | 0.2366 |
|
1283 |
| 0.9931 | 56200 | 0.2505 |
|
1284 |
+
| 0.9940 | 56250 | 0.3341 |
|
1285 |
+
| 0.9949 | 56300 | 0.3292 |
|
1286 |
+
| 0.9957 | 56350 | 0.3199 |
|
1287 |
+
| 0.9966 | 56400 | 0.284 |
|
1288 |
|
1289 |
|
1290 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4747cc93084121a20f52f8b5b3fae00c2031a6d438b4dd7c030dc7e8652c32aa
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a01f3f6168f288a9f54b715fca053c4247ee3c93290ec1fa8d7523534d20340
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85f4a9b315b1a20d2d787ee19f75f3a9099eeab81535c38061478be96f3a2307
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:122be002ed2c68d1f49408fb696a88fb75bec95372b6d4e67c49fa667ddaf90a
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da795e3f463af8c3f55b378fcdaabc95ba9e2728665cb3652b390ac2ddc09ef8
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7876,6 +7876,34 @@
|
|
7876 |
"learning_rate": 4.0643223184308187e-07,
|
7877 |
"loss": 0.2505,
|
7878 |
"step": 56200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7879 |
}
|
7880 |
],
|
7881 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9966249050202329,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 56400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7876 |
"learning_rate": 4.0643223184308187e-07,
|
7877 |
"loss": 0.2505,
|
7878 |
"step": 56200
|
7879 |
+
},
|
7880 |
+
{
|
7881 |
+
"epoch": 0.9939743068685833,
|
7882 |
+
"grad_norm": 1.548779010772705,
|
7883 |
+
"learning_rate": 3.5734621350454537e-07,
|
7884 |
+
"loss": 0.3341,
|
7885 |
+
"step": 56250
|
7886 |
+
},
|
7887 |
+
{
|
7888 |
+
"epoch": 0.9948578395857999,
|
7889 |
+
"grad_norm": 1.6362569332122803,
|
7890 |
+
"learning_rate": 3.0826019516600893e-07,
|
7891 |
+
"loss": 0.3292,
|
7892 |
+
"step": 56300
|
7893 |
+
},
|
7894 |
+
{
|
7895 |
+
"epoch": 0.9957413723030164,
|
7896 |
+
"grad_norm": 1.707270622253418,
|
7897 |
+
"learning_rate": 2.591741768274725e-07,
|
7898 |
+
"loss": 0.3199,
|
7899 |
+
"step": 56350
|
7900 |
+
},
|
7901 |
+
{
|
7902 |
+
"epoch": 0.9966249050202329,
|
7903 |
+
"grad_norm": 2.1296205520629883,
|
7904 |
+
"learning_rate": 2.1008815848893604e-07,
|
7905 |
+
"loss": 0.284,
|
7906 |
+
"step": 56400
|
7907 |
}
|
7908 |
],
|
7909 |
"logging_steps": 50,
|