Training in progress, step 9800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1217,6 +1217,10 @@ You can finetune this model on your own dataset.
|
|
1217 |
| 0.1679 | 9500 | 0.3932 |
|
1218 |
| 0.1688 | 9550 | 0.4836 |
|
1219 |
| 0.1696 | 9600 | 0.3989 |
|
|
|
|
|
|
|
|
|
1220 |
|
1221 |
|
1222 |
### Framework Versions
|
|
|
1217 |
| 0.1679 | 9500 | 0.3932 |
|
1218 |
| 0.1688 | 9550 | 0.4836 |
|
1219 |
| 0.1696 | 9600 | 0.3989 |
|
1220 |
+
| 0.1705 | 9650 | 0.4025 |
|
1221 |
+
| 0.1714 | 9700 | 0.467 |
|
1222 |
+
| 0.1723 | 9750 | 0.3558 |
|
1223 |
+
| 0.1732 | 9800 | 0.3623 |
|
1224 |
|
1225 |
|
1226 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb531b99dca162667fb726e612e94c051018f779f5ff509bebfc939722f85886
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4671b0ce660e002e126649d2065bbd8f4633842c0c71f60f512ccc9dba9aba21
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95f0e0a231d2de9fe39fce57e75a586db12e27f102a687e2901f534fc55060b6
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99e39c7d3d290833603a74523e3d4fc84b4f0f3056845fa8a3e71170347650dd
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a105c4972c9f577b54a9a86ea803b0dfd0803e1dbb9c812134e5869ca00a97d7
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -1352,6 +1352,34 @@
|
|
1352 |
"learning_rate": 4.61330034752901e-05,
|
1353 |
"loss": 0.3989,
|
1354 |
"step": 9600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1355 |
}
|
1356 |
],
|
1357 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.17317241257443763,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 9800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
1352 |
"learning_rate": 4.61330034752901e-05,
|
1353 |
"loss": 0.3989,
|
1354 |
"step": 9600
|
1355 |
+
},
|
1356 |
+
{
|
1357 |
+
"epoch": 0.17052181442278808,
|
1358 |
+
"grad_norm": 1.5831292867660522,
|
1359 |
+
"learning_rate": 4.608391745695156e-05,
|
1360 |
+
"loss": 0.4025,
|
1361 |
+
"step": 9650
|
1362 |
+
},
|
1363 |
+
{
|
1364 |
+
"epoch": 0.1714053471400046,
|
1365 |
+
"grad_norm": 5.1861371994018555,
|
1366 |
+
"learning_rate": 4.6034831438613027e-05,
|
1367 |
+
"loss": 0.467,
|
1368 |
+
"step": 9700
|
1369 |
+
},
|
1370 |
+
{
|
1371 |
+
"epoch": 0.1722888798572211,
|
1372 |
+
"grad_norm": 3.7466721534729004,
|
1373 |
+
"learning_rate": 4.598574542027449e-05,
|
1374 |
+
"loss": 0.3558,
|
1375 |
+
"step": 9750
|
1376 |
+
},
|
1377 |
+
{
|
1378 |
+
"epoch": 0.17317241257443763,
|
1379 |
+
"grad_norm": 2.143721342086792,
|
1380 |
+
"learning_rate": 4.5936659401935956e-05,
|
1381 |
+
"loss": 0.3623,
|
1382 |
+
"step": 9800
|
1383 |
}
|
1384 |
],
|
1385 |
"logging_steps": 50,
|