Training in progress, step 44400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1403,6 +1403,10 @@ You can finetune this model on your own dataset.
|
|
1403 |
| 0.7793 | 44100 | 0.2785 |
|
1404 |
| 0.7802 | 44150 | 0.2386 |
|
1405 |
| 0.7810 | 44200 | 0.3297 |
|
|
|
|
|
|
|
|
|
1406 |
|
1407 |
</details>
|
1408 |
|
|
|
1403 |
| 0.7793 | 44100 | 0.2785 |
|
1404 |
| 0.7802 | 44150 | 0.2386 |
|
1405 |
| 0.7810 | 44200 | 0.3297 |
|
1406 |
+
| 0.7819 | 44250 | 0.2576 |
|
1407 |
+
| 0.7828 | 44300 | 0.2678 |
|
1408 |
+
| 0.7837 | 44350 | 0.3625 |
|
1409 |
+
| 0.7846 | 44400 | 0.311 |
|
1410 |
|
1411 |
</details>
|
1412 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0590fb55aab08f3535397ce8dcef99c8680506c32302ba2d6e910a8249da6224
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abff80a73aec9612ce24bcd8cc146093b60dfe7bfd2e43a1f500d834e46d5802
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eb5618265930c1ad5f9342c3f992eaed8023f41d629ef3773582c7b22437a29
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9756468f8fd1e049c0f34546e465c8cf6b5e4ecebc81c7ebf057569a23c46ac
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdfed137404ca82aee02dc12b81b45bc0fb558d702987085b73e2f3d6d8e8b76
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -6196,6 +6196,34 @@
|
|
6196 |
"learning_rate": 1.2181186310891207e-05,
|
6197 |
"loss": 0.3297,
|
6198 |
"step": 44200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6199 |
}
|
6200 |
],
|
6201 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.7845770528882684,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 44400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
6196 |
"learning_rate": 1.2181186310891207e-05,
|
6197 |
"loss": 0.3297,
|
6198 |
"step": 44200
|
6199 |
+
},
|
6200 |
+
{
|
6201 |
+
"epoch": 0.7819264547366189,
|
6202 |
+
"grad_norm": 1.0489422082901,
|
6203 |
+
"learning_rate": 1.213210029255267e-05,
|
6204 |
+
"loss": 0.2576,
|
6205 |
+
"step": 44250
|
6206 |
+
},
|
6207 |
+
{
|
6208 |
+
"epoch": 0.7828099874538355,
|
6209 |
+
"grad_norm": 2.2906908988952637,
|
6210 |
+
"learning_rate": 1.2083014274214133e-05,
|
6211 |
+
"loss": 0.2678,
|
6212 |
+
"step": 44300
|
6213 |
+
},
|
6214 |
+
{
|
6215 |
+
"epoch": 0.783693520171052,
|
6216 |
+
"grad_norm": 1.5887507200241089,
|
6217 |
+
"learning_rate": 1.2033928255875597e-05,
|
6218 |
+
"loss": 0.3625,
|
6219 |
+
"step": 44350
|
6220 |
+
},
|
6221 |
+
{
|
6222 |
+
"epoch": 0.7845770528882684,
|
6223 |
+
"grad_norm": 1.592004418373108,
|
6224 |
+
"learning_rate": 1.1984842237537062e-05,
|
6225 |
+
"loss": 0.311,
|
6226 |
+
"step": 44400
|
6227 |
}
|
6228 |
],
|
6229 |
"logging_steps": 50,
|