Training in progress, step 44600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1407,6 +1407,10 @@ You can finetune this model on your own dataset.
|
|
1407 |
| 0.7828 | 44300 | 0.2678 |
|
1408 |
| 0.7837 | 44350 | 0.3625 |
|
1409 |
| 0.7846 | 44400 | 0.311 |
|
|
|
|
|
|
|
|
|
1410 |
|
1411 |
</details>
|
1412 |
|
|
|
1407 |
| 0.7828 | 44300 | 0.2678 |
|
1408 |
| 0.7837 | 44350 | 0.3625 |
|
1409 |
| 0.7846 | 44400 | 0.311 |
|
1410 |
+
| 0.7855 | 44450 | 0.2924 |
|
1411 |
+
| 0.7863 | 44500 | 0.2602 |
|
1412 |
+
| 0.7872 | 44550 | 0.3936 |
|
1413 |
+
| 0.7881 | 44600 | 0.229 |
|
1414 |
|
1415 |
</details>
|
1416 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fce513425ee78d82ca94654cdc0a4320210e5ec028784be1406c877832c93919
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:382cb2a8e601a053bb0ea3969b939edccc79edf806fa2fb79934a1132c941d31
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bf11240eaa3ac15b3a592742695066b7826256894b145544defe57b1a51d2f5
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47e00c9999dcddbc2f016707d637f020dd9c8d184d501770291cf9d13b707305
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f34973fe7d7e41a1f8ad1f0f8cd4803fde065c5148acc0e939fcacd0a4b92d9
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -6224,6 +6224,34 @@
|
|
6224 |
"learning_rate": 1.1984842237537062e-05,
|
6225 |
"loss": 0.311,
|
6226 |
"step": 44400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6227 |
}
|
6228 |
],
|
6229 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.7881111837571345,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 44600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
6224 |
"learning_rate": 1.1984842237537062e-05,
|
6225 |
"loss": 0.311,
|
6226 |
"step": 44400
|
6227 |
+
},
|
6228 |
+
{
|
6229 |
+
"epoch": 0.7854605856054849,
|
6230 |
+
"grad_norm": 1.2201918363571167,
|
6231 |
+
"learning_rate": 1.1935756219198523e-05,
|
6232 |
+
"loss": 0.2924,
|
6233 |
+
"step": 44450
|
6234 |
+
},
|
6235 |
+
{
|
6236 |
+
"epoch": 0.7863441183227015,
|
6237 |
+
"grad_norm": 1.6248202323913574,
|
6238 |
+
"learning_rate": 1.1886670200859988e-05,
|
6239 |
+
"loss": 0.2602,
|
6240 |
+
"step": 44500
|
6241 |
+
},
|
6242 |
+
{
|
6243 |
+
"epoch": 0.787227651039918,
|
6244 |
+
"grad_norm": 2.032122850418091,
|
6245 |
+
"learning_rate": 1.183758418252145e-05,
|
6246 |
+
"loss": 0.3936,
|
6247 |
+
"step": 44550
|
6248 |
+
},
|
6249 |
+
{
|
6250 |
+
"epoch": 0.7881111837571345,
|
6251 |
+
"grad_norm": 1.149383783340454,
|
6252 |
+
"learning_rate": 1.1788498164182915e-05,
|
6253 |
+
"loss": 0.229,
|
6254 |
+
"step": 44600
|
6255 |
}
|
6256 |
],
|
6257 |
"logging_steps": 50,
|