Training in progress, step 53800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1229,6 +1229,10 @@ You can finetune this model on your own dataset.
|
|
1229 |
| 0.9454 | 53500 | 0.3135 |
|
1230 |
| 0.9463 | 53550 | 0.2813 |
|
1231 |
| 0.9471 | 53600 | 0.2333 |
|
|
|
|
|
|
|
|
|
1232 |
|
1233 |
|
1234 |
### Framework Versions
|
|
|
1229 |
| 0.9454 | 53500 | 0.3135 |
|
1230 |
| 0.9463 | 53550 | 0.2813 |
|
1231 |
| 0.9471 | 53600 | 0.2333 |
|
1232 |
+
| 0.9480 | 53650 | 0.3022 |
|
1233 |
+
| 0.9489 | 53700 | 0.303 |
|
1234 |
+
| 0.9498 | 53750 | 0.2651 |
|
1235 |
+
| 0.9507 | 53800 | 0.3384 |
|
1236 |
|
1237 |
|
1238 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32883cc8380b5551ef8b0cc0e17f87142260e9e4e1f436103ef31fd1a32a59e7
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdb8e455fcefebf6e65cc70da46171fa65d7d3724795dbd5ebe39eb526ed73b1
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa1d10cc07f26722031d44ae9ed204b0fd93f2944405a821aaf7d32c94a690ee
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcb8b2220528541cf3e98738aad3670d259e0a7ccf7ba003e8c4a04ead86cd5e
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02c7889d99a6f23f73ba0719fb9e5f5f23bfc59153bd58184aaf763c67dfbef
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7512,6 +7512,34 @@
|
|
7512 |
"learning_rate": 2.9579234650802064e-06,
|
7513 |
"loss": 0.2333,
|
7514 |
"step": 53600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7515 |
}
|
7516 |
],
|
7517 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9506812037249739,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 53800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7512 |
"learning_rate": 2.9579234650802064e-06,
|
7513 |
"loss": 0.2333,
|
7514 |
"step": 53600
|
7515 |
+
},
|
7516 |
+
{
|
7517 |
+
"epoch": 0.9480306055733244,
|
7518 |
+
"grad_norm": 3.0708839893341064,
|
7519 |
+
"learning_rate": 2.9088374467416706e-06,
|
7520 |
+
"loss": 0.3022,
|
7521 |
+
"step": 53650
|
7522 |
+
},
|
7523 |
+
{
|
7524 |
+
"epoch": 0.9489141382905409,
|
7525 |
+
"grad_norm": 1.637635588645935,
|
7526 |
+
"learning_rate": 2.859751428403134e-06,
|
7527 |
+
"loss": 0.303,
|
7528 |
+
"step": 53700
|
7529 |
+
},
|
7530 |
+
{
|
7531 |
+
"epoch": 0.9497976710077575,
|
7532 |
+
"grad_norm": 2.5479607582092285,
|
7533 |
+
"learning_rate": 2.8106654100645974e-06,
|
7534 |
+
"loss": 0.2651,
|
7535 |
+
"step": 53750
|
7536 |
+
},
|
7537 |
+
{
|
7538 |
+
"epoch": 0.9506812037249739,
|
7539 |
+
"grad_norm": 4.394486427307129,
|
7540 |
+
"learning_rate": 2.761579391726061e-06,
|
7541 |
+
"loss": 0.3384,
|
7542 |
+
"step": 53800
|
7543 |
}
|
7544 |
],
|
7545 |
"logging_steps": 50,
|