Training in progress, step 50800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1307,6 +1307,10 @@ You can finetune this model on your own dataset.
|
|
1307 |
| 0.8924 | 50500 | 0.3035 |
|
1308 |
| 0.8933 | 50550 | 0.2654 |
|
1309 |
| 0.8941 | 50600 | 0.2742 |
|
|
|
|
|
|
|
|
|
1310 |
|
1311 |
</details>
|
1312 |
|
|
|
1307 |
| 0.8924 | 50500 | 0.3035 |
|
1308 |
| 0.8933 | 50550 | 0.2654 |
|
1309 |
| 0.8941 | 50600 | 0.2742 |
|
1310 |
+
| 0.8950 | 50650 | 0.3435 |
|
1311 |
+
| 0.8959 | 50700 | 0.2947 |
|
1312 |
+
| 0.8968 | 50750 | 0.3013 |
|
1313 |
+
| 0.8977 | 50800 | 0.3845 |
|
1314 |
|
1315 |
</details>
|
1316 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:735447a9bf44cea3982a63ea1f966e08b7318fb0e661723166d288f02b87e519
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2850e2599deef45e89d0f9a762b74aca5edd8c76810f98fd68acb5fea8d7226
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65c5b25f41326cbbfda1e3a08018f1c0a2d702a29def2a1a126e092c108f1e2b
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2eecf2b6d210d44959e71df0344547cefe9b13a61cd7bf96084fa2fabddd2a6b
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3068e69bbcee94c299b9e89630d72a89daebc924a613171ca109523fc3200153
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7092,6 +7092,34 @@
|
|
7092 |
"learning_rate": 5.901121124658853e-06,
|
7093 |
"loss": 0.2742,
|
7094 |
"step": 50600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7095 |
}
|
7096 |
],
|
7097 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.8976692406919828,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 50800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7092 |
"learning_rate": 5.901121124658853e-06,
|
7093 |
"loss": 0.2742,
|
7094 |
"step": 50600
|
7095 |
+
},
|
7096 |
+
{
|
7097 |
+
"epoch": 0.8950186425403333,
|
7098 |
+
"grad_norm": 1.4973150491714478,
|
7099 |
+
"learning_rate": 5.852035106320316e-06,
|
7100 |
+
"loss": 0.3435,
|
7101 |
+
"step": 50650
|
7102 |
+
},
|
7103 |
+
{
|
7104 |
+
"epoch": 0.8959021752575498,
|
7105 |
+
"grad_norm": 1.2695672512054443,
|
7106 |
+
"learning_rate": 5.80294908798178e-06,
|
7107 |
+
"loss": 0.2947,
|
7108 |
+
"step": 50700
|
7109 |
+
},
|
7110 |
+
{
|
7111 |
+
"epoch": 0.8967857079747663,
|
7112 |
+
"grad_norm": 0.9303974509239197,
|
7113 |
+
"learning_rate": 5.753863069643243e-06,
|
7114 |
+
"loss": 0.3013,
|
7115 |
+
"step": 50750
|
7116 |
+
},
|
7117 |
+
{
|
7118 |
+
"epoch": 0.8976692406919828,
|
7119 |
+
"grad_norm": 1.5696642398834229,
|
7120 |
+
"learning_rate": 5.704777051304706e-06,
|
7121 |
+
"loss": 0.3845,
|
7122 |
+
"step": 50800
|
7123 |
}
|
7124 |
],
|
7125 |
"logging_steps": 50,
|