Training in progress, step 54800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1249,6 +1249,10 @@ You can finetune this model on your own dataset.
|
|
1249 |
| 0.9631 | 54500 | 0.3229 |
|
1250 |
| 0.9639 | 54550 | 0.3064 |
|
1251 |
| 0.9648 | 54600 | 0.2925 |
|
|
|
|
|
|
|
|
|
1252 |
|
1253 |
|
1254 |
### Framework Versions
|
|
|
1249 |
| 0.9631 | 54500 | 0.3229 |
|
1250 |
| 0.9639 | 54550 | 0.3064 |
|
1251 |
| 0.9648 | 54600 | 0.2925 |
|
1252 |
+
| 0.9657 | 54650 | 0.301 |
|
1253 |
+
| 0.9666 | 54700 | 0.2484 |
|
1254 |
+
| 0.9675 | 54750 | 0.3228 |
|
1255 |
+
| 0.9684 | 54800 | 0.2935 |
|
1256 |
|
1257 |
|
1258 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4660d5cc04b8230e366c2507b6edc6eb9740e5655f078f06d7fa8b708868348d
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7ddc79258480305b86c7de987bf66efad05525fb132f3c4537096562844858a
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe80331ddc64a10f3922d2d2aa6e1345a324e582ebfcde4acd33dd3c01a97f23
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a394d9ef4969559c2e748a44250c8baede91e41491cb79bd3a87fcd799d229f
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:175e278f8a42116e02058ec0c5ec6fdf92f24c652abe7967b071b8b6350190f1
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7652,6 +7652,34 @@
|
|
7652 |
"learning_rate": 1.9762030983094777e-06,
|
7653 |
"loss": 0.2925,
|
7654 |
"step": 54600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7655 |
}
|
7656 |
],
|
7657 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9683518580693043,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 54800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7652 |
"learning_rate": 1.9762030983094777e-06,
|
7653 |
"loss": 0.2925,
|
7654 |
"step": 54600
|
7655 |
+
},
|
7656 |
+
{
|
7657 |
+
"epoch": 0.9657012599176548,
|
7658 |
+
"grad_norm": 3.4723856449127197,
|
7659 |
+
"learning_rate": 1.927117079970941e-06,
|
7660 |
+
"loss": 0.301,
|
7661 |
+
"step": 54650
|
7662 |
+
},
|
7663 |
+
{
|
7664 |
+
"epoch": 0.9665847926348713,
|
7665 |
+
"grad_norm": 3.3657915592193604,
|
7666 |
+
"learning_rate": 1.8780310616324047e-06,
|
7667 |
+
"loss": 0.2484,
|
7668 |
+
"step": 54700
|
7669 |
+
},
|
7670 |
+
{
|
7671 |
+
"epoch": 0.9674683253520878,
|
7672 |
+
"grad_norm": 3.2125537395477295,
|
7673 |
+
"learning_rate": 1.8289450432938681e-06,
|
7674 |
+
"loss": 0.3228,
|
7675 |
+
"step": 54750
|
7676 |
+
},
|
7677 |
+
{
|
7678 |
+
"epoch": 0.9683518580693043,
|
7679 |
+
"grad_norm": 3.5145859718322754,
|
7680 |
+
"learning_rate": 1.779859024955332e-06,
|
7681 |
+
"loss": 0.2935,
|
7682 |
+
"step": 54800
|
7683 |
}
|
7684 |
],
|
7685 |
"logging_steps": 50,
|