Training in progress, step 5200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
-
# tensor([[1.0000, 0.
|
290 |
-
# [0.
|
291 |
-
# [0.
|
292 |
```
|
293 |
|
294 |
<!--
|
@@ -1222,6 +1222,8 @@ You can finetune this model on your own dataset.
|
|
1222 |
</details>
|
1223 |
|
1224 |
### Training Logs
|
|
|
|
|
1225 |
| Epoch | Step | Training Loss |
|
1226 |
|:------:|:----:|:-------------:|
|
1227 |
| 0.0009 | 50 | 1.3738 |
|
@@ -1324,7 +1326,12 @@ You can finetune this model on your own dataset.
|
|
1324 |
| 0.0866 | 4900 | 0.4275 |
|
1325 |
| 0.0875 | 4950 | 0.5575 |
|
1326 |
| 0.0884 | 5000 | 0.4197 |
|
|
|
|
|
|
|
|
|
1327 |
|
|
|
1328 |
|
1329 |
### Framework Versions
|
1330 |
- Python: 3.12.11
|
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
+
# tensor([[1.0000, 0.6341, 0.1285],
|
290 |
+
# [0.6341, 1.0000, 0.1635],
|
291 |
+
# [0.1285, 0.1635, 1.0000]])
|
292 |
```
|
293 |
|
294 |
<!--
|
|
|
1222 |
</details>
|
1223 |
|
1224 |
### Training Logs
|
1225 |
+
<details><summary>Click to expand</summary>
|
1226 |
+
|
1227 |
| Epoch | Step | Training Loss |
|
1228 |
|:------:|:----:|:-------------:|
|
1229 |
| 0.0009 | 50 | 1.3738 |
|
|
|
1326 |
| 0.0866 | 4900 | 0.4275 |
|
1327 |
| 0.0875 | 4950 | 0.5575 |
|
1328 |
| 0.0884 | 5000 | 0.4197 |
|
1329 |
+
| 0.0892 | 5050 | 0.4525 |
|
1330 |
+
| 0.0901 | 5100 | 0.4469 |
|
1331 |
+
| 0.0910 | 5150 | 0.5283 |
|
1332 |
+
| 0.0919 | 5200 | 0.4826 |
|
1333 |
|
1334 |
+
</details>
|
1335 |
|
1336 |
### Framework Versions
|
1337 |
- Python: 3.12.11
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4af90c7f23b49e51d466a6d8a0d86939bf37e4d8ea5c0b655ab35b7c151a05cf
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609611
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:544baf7d60ab6e318f100fc6c2fccfb140e19ae226f8f7d820d236577b1da105
|
3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11c0cb9bd2c9c6c5c964ae0bb4e4e4872958c4dc97ac96f40f1c118dec4c9803
|
3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1383
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afb655591367e732af512c0b489f6652e710a205ef4e8286da8729a948980ee0
|
3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:407fdddb74e20b064ec7452aeeae963d95b777da5e4cde56c489f916bcf701b2
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -708,6 +708,34 @@
|
|
708 |
"learning_rate": 4.4160777385159016e-05,
|
709 |
"loss": 0.4197,
|
710 |
"step": 5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
711 |
}
|
712 |
],
|
713 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.09188740259051793,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 5200,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
708 |
"learning_rate": 4.4160777385159016e-05,
|
709 |
"loss": 0.4197,
|
710 |
"step": 5000
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 0.08923680443886838,
|
714 |
+
"grad_norm": 1.8962676525115967,
|
715 |
+
"learning_rate": 4.4602473498233214e-05,
|
716 |
+
"loss": 0.4525,
|
717 |
+
"step": 5050
|
718 |
+
},
|
719 |
+
{
|
720 |
+
"epoch": 0.09012033715608489,
|
721 |
+
"grad_norm": 2.1373822689056396,
|
722 |
+
"learning_rate": 4.5044169611307425e-05,
|
723 |
+
"loss": 0.4469,
|
724 |
+
"step": 5100
|
725 |
+
},
|
726 |
+
{
|
727 |
+
"epoch": 0.0910038698733014,
|
728 |
+
"grad_norm": 5.542126178741455,
|
729 |
+
"learning_rate": 4.548586572438163e-05,
|
730 |
+
"loss": 0.5283,
|
731 |
+
"step": 5150
|
732 |
+
},
|
733 |
+
{
|
734 |
+
"epoch": 0.09188740259051793,
|
735 |
+
"grad_norm": 2.4414310455322266,
|
736 |
+
"learning_rate": 4.5927561837455834e-05,
|
737 |
+
"loss": 0.4826,
|
738 |
+
"step": 5200
|
739 |
}
|
740 |
],
|
741 |
"logging_steps": 50,
|