Training in progress, step 52400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1201,6 +1201,10 @@ You can finetune this model on your own dataset.
|
|
1201 |
| 0.9206 | 52100 | 0.2286 |
|
1202 |
| 0.9215 | 52150 | 0.371 |
|
1203 |
| 0.9224 | 52200 | 0.2825 |
|
|
|
|
|
|
|
|
|
1204 |
|
1205 |
|
1206 |
### Framework Versions
|
|
|
1201 |
| 0.9206 | 52100 | 0.2286 |
|
1202 |
| 0.9215 | 52150 | 0.371 |
|
1203 |
| 0.9224 | 52200 | 0.2825 |
|
1204 |
+
| 0.9233 | 52250 | 0.3728 |
|
1205 |
+
| 0.9242 | 52300 | 0.3196 |
|
1206 |
+
| 0.9251 | 52350 | 0.3322 |
|
1207 |
+
| 0.9259 | 52400 | 0.3258 |
|
1208 |
|
1209 |
|
1210 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b6ccee8235180e0fcb744d9c9527b2c600512f7ecc4b3d25488b384822838a3
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:002e9ce3d93da778bb4fe1c95037789e052796647a41c67b89002e6d3a8a69ff
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3763a2915178d87a90c93bc33c9a17dd8d33e1ee286d4f530459bdee03d15b6d
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0315f631d073b9e84cefc9abe7ee2e75af036769039c7a2bf20f3d81a5e6f752
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acf725a620db2293e4b5f5ac9e35cb4f2f88cf44a5a0884c22a06b906efee164
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7316,6 +7316,34 @@
|
|
7316 |
"learning_rate": 4.331350258192457e-06,
|
7317 |
"loss": 0.2825,
|
7318 |
"step": 52200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7319 |
}
|
7320 |
],
|
7321 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9259422876429114,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 52400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7316 |
"learning_rate": 4.331350258192457e-06,
|
7317 |
"loss": 0.2825,
|
7318 |
"step": 52200
|
7319 |
+
},
|
7320 |
+
{
|
7321 |
+
"epoch": 0.9232916894912618,
|
7322 |
+
"grad_norm": 1.2270597219467163,
|
7323 |
+
"learning_rate": 4.28226423985392e-06,
|
7324 |
+
"loss": 0.3728,
|
7325 |
+
"step": 52250
|
7326 |
+
},
|
7327 |
+
{
|
7328 |
+
"epoch": 0.9241752222084784,
|
7329 |
+
"grad_norm": 1.8672150373458862,
|
7330 |
+
"learning_rate": 4.233178221515384e-06,
|
7331 |
+
"loss": 0.3196,
|
7332 |
+
"step": 52300
|
7333 |
+
},
|
7334 |
+
{
|
7335 |
+
"epoch": 0.9250587549256949,
|
7336 |
+
"grad_norm": 1.6005786657333374,
|
7337 |
+
"learning_rate": 4.184092203176848e-06,
|
7338 |
+
"loss": 0.3322,
|
7339 |
+
"step": 52350
|
7340 |
+
},
|
7341 |
+
{
|
7342 |
+
"epoch": 0.9259422876429114,
|
7343 |
+
"grad_norm": 1.4158750772476196,
|
7344 |
+
"learning_rate": 4.135006184838311e-06,
|
7345 |
+
"loss": 0.3258,
|
7346 |
+
"step": 52400
|
7347 |
}
|
7348 |
],
|
7349 |
"logging_steps": 50,
|