Training in progress, step 54600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1245,6 +1245,10 @@ You can finetune this model on your own dataset.
|
|
1245 |
| 0.9595 | 54300 | 0.2834 |
|
1246 |
| 0.9604 | 54350 | 0.3271 |
|
1247 |
| 0.9613 | 54400 | 0.3048 |
|
|
|
|
|
|
|
|
|
1248 |
|
1249 |
|
1250 |
### Framework Versions
|
|
|
1245 |
| 0.9595 | 54300 | 0.2834 |
|
1246 |
| 0.9604 | 54350 | 0.3271 |
|
1247 |
| 0.9613 | 54400 | 0.3048 |
|
1248 |
+
| 0.9622 | 54450 | 0.2813 |
|
1249 |
+
| 0.9631 | 54500 | 0.3229 |
|
1250 |
+
| 0.9639 | 54550 | 0.3064 |
|
1251 |
+
| 0.9648 | 54600 | 0.2925 |
|
1252 |
|
1253 |
|
1254 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6908b7749db5d0a826221cc7ecf07127dce59b7cc3e1282b238fab4edc8c06dc
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f62d74de1aa89c4bdb621c1ad70670a4bf5390f8a562eceb290549e4b451ee2
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06c5edde7f9f695c0f4e2228698ef25b87e27f53ab10a510ae3e7d0707680eae
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:722f444d07f10344822682b1f6fd20e81bcbd5d550a56fbe82dc92a7f96b7a4c
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2266beea22f5d851f4cb4c5de2987869a11c52788a165a553f5636a0c72bb073
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7624,6 +7624,34 @@
|
|
7624 |
"learning_rate": 2.1725471716636233e-06,
|
7625 |
"loss": 0.3048,
|
7626 |
"step": 54400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7627 |
}
|
7628 |
],
|
7629 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9648177272004382,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 54600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7624 |
"learning_rate": 2.1725471716636233e-06,
|
7625 |
"loss": 0.3048,
|
7626 |
"step": 54400
|
7627 |
+
},
|
7628 |
+
{
|
7629 |
+
"epoch": 0.9621671290487886,
|
7630 |
+
"grad_norm": 1.5154780149459839,
|
7631 |
+
"learning_rate": 2.1234611533250867e-06,
|
7632 |
+
"loss": 0.2813,
|
7633 |
+
"step": 54450
|
7634 |
+
},
|
7635 |
+
{
|
7636 |
+
"epoch": 0.9630506617660052,
|
7637 |
+
"grad_norm": 1.3216954469680786,
|
7638 |
+
"learning_rate": 2.0743751349865505e-06,
|
7639 |
+
"loss": 0.3229,
|
7640 |
+
"step": 54500
|
7641 |
+
},
|
7642 |
+
{
|
7643 |
+
"epoch": 0.9639341944832217,
|
7644 |
+
"grad_norm": 1.5333393812179565,
|
7645 |
+
"learning_rate": 2.0252891166480143e-06,
|
7646 |
+
"loss": 0.3064,
|
7647 |
+
"step": 54550
|
7648 |
+
},
|
7649 |
+
{
|
7650 |
+
"epoch": 0.9648177272004382,
|
7651 |
+
"grad_norm": 1.3715639114379883,
|
7652 |
+
"learning_rate": 1.9762030983094777e-06,
|
7653 |
+
"loss": 0.2925,
|
7654 |
+
"step": 54600
|
7655 |
}
|
7656 |
],
|
7657 |
"logging_steps": 50,
|