Training in progress, step 50600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1303,6 +1303,10 @@ You can finetune this model on your own dataset.
|
|
1303 |
| 0.8888 | 50300 | 0.3597 |
|
1304 |
| 0.8897 | 50350 | 0.3246 |
|
1305 |
| 0.8906 | 50400 | 0.2658 |
|
|
|
|
|
|
|
|
|
1306 |
|
1307 |
</details>
|
1308 |
|
|
|
1303 |
| 0.8888 | 50300 | 0.3597 |
|
1304 |
| 0.8897 | 50350 | 0.3246 |
|
1305 |
| 0.8906 | 50400 | 0.2658 |
|
1306 |
+
| 0.8915 | 50450 | 0.2954 |
|
1307 |
+
| 0.8924 | 50500 | 0.3035 |
|
1308 |
+
| 0.8933 | 50550 | 0.2654 |
|
1309 |
+
| 0.8941 | 50600 | 0.2742 |
|
1310 |
|
1311 |
</details>
|
1312 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7c4343fcf9b81ee5a6a389eb06b051ae53e0aa443077eb13f9892dc66deceec
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c5af64aade248d58122340358b0197dce309cca03ffce8ae549b8f07d208db3
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd5bc13ec5ce0dc9f4391185df8ee21467fa45863b467072b951e2bda0426e6a
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbdc3fca57fcb200a77e9e8cef345de9804f9877d6a37468a61a5b2e8c3dd1d5
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f9f1c43d41c176471993abe4ddcdbee9a97b13ab57ea41f5ea87ce032c51df5
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7064,6 +7064,34 @@
|
|
7064 |
"learning_rate": 6.097465198012998e-06,
|
7065 |
"loss": 0.2658,
|
7066 |
"step": 50400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7067 |
}
|
7068 |
],
|
7069 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.8941351098231167,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 50600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7064 |
"learning_rate": 6.097465198012998e-06,
|
7065 |
"loss": 0.2658,
|
7066 |
"step": 50400
|
7067 |
+
},
|
7068 |
+
{
|
7069 |
+
"epoch": 0.8914845116714671,
|
7070 |
+
"grad_norm": 1.7012232542037964,
|
7071 |
+
"learning_rate": 6.048379179674462e-06,
|
7072 |
+
"loss": 0.2954,
|
7073 |
+
"step": 50450
|
7074 |
+
},
|
7075 |
+
{
|
7076 |
+
"epoch": 0.8923680443886837,
|
7077 |
+
"grad_norm": 1.2959450483322144,
|
7078 |
+
"learning_rate": 5.999293161335925e-06,
|
7079 |
+
"loss": 0.3035,
|
7080 |
+
"step": 50500
|
7081 |
+
},
|
7082 |
+
{
|
7083 |
+
"epoch": 0.8932515771059002,
|
7084 |
+
"grad_norm": 1.6592167615890503,
|
7085 |
+
"learning_rate": 5.950207142997389e-06,
|
7086 |
+
"loss": 0.2654,
|
7087 |
+
"step": 50550
|
7088 |
+
},
|
7089 |
+
{
|
7090 |
+
"epoch": 0.8941351098231167,
|
7091 |
+
"grad_norm": 1.2229481935501099,
|
7092 |
+
"learning_rate": 5.901121124658853e-06,
|
7093 |
+
"loss": 0.2742,
|
7094 |
+
"step": 50600
|
7095 |
}
|
7096 |
],
|
7097 |
"logging_steps": 50,
|