Training in progress, step 5000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
-
# tensor([[1.0000, 0.
|
290 |
-
# [0.
|
291 |
-
# [0.
|
292 |
```
|
293 |
|
294 |
<!--
|
@@ -1320,6 +1320,10 @@ You can finetune this model on your own dataset.
|
|
1320 |
| 0.0831 | 4700 | 0.5158 |
|
1321 |
| 0.0839 | 4750 | 0.4598 |
|
1322 |
| 0.0848 | 4800 | 0.4662 |
|
|
|
|
|
|
|
|
|
1323 |
|
1324 |
|
1325 |
### Framework Versions
|
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
+
# tensor([[1.0000, 0.6188, 0.1217],
|
290 |
+
# [0.6188, 1.0000, 0.1507],
|
291 |
+
# [0.1217, 0.1507, 1.0000]])
|
292 |
```
|
293 |
|
294 |
<!--
|
|
|
1320 |
| 0.0831 | 4700 | 0.5158 |
|
1321 |
| 0.0839 | 4750 | 0.4598 |
|
1322 |
| 0.0848 | 4800 | 0.4662 |
|
1323 |
+
| 0.0857 | 4850 | 0.459 |
|
1324 |
+
| 0.0866 | 4900 | 0.4275 |
|
1325 |
+
| 0.0875 | 4950 | 0.5575 |
|
1326 |
+
| 0.0884 | 5000 | 0.4197 |
|
1327 |
|
1328 |
|
1329 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90e4d2bbf59fbcb40ff4b4462a436528ececec21cfbe639ba0aa1880e8a048b9
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609611
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:498b25c3a0cdf659b68bca58a1fe81b240dadbd1c0686b5aa30fdf8a0f4407de
|
3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbd110dd6b99c908a73a401be3d8438b48f355d5cb710bfb9c01fca2894f5f8e
|
3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1383
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82ad8990572ad11a824b7db276c8af49c179ca7e7724b4e6906cd0ae480a80a8
|
3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77d3a1390f85cf6329aeb072176eb6782a1d00623e0775f00f722a024157ca78
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -680,6 +680,34 @@
|
|
680 |
"learning_rate": 4.239399293286219e-05,
|
681 |
"loss": 0.4662,
|
682 |
"step": 4800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
683 |
}
|
684 |
],
|
685 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.08835327172165185,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 5000,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
680 |
"learning_rate": 4.239399293286219e-05,
|
681 |
"loss": 0.4662,
|
682 |
"step": 4800
|
683 |
+
},
|
684 |
+
{
|
685 |
+
"epoch": 0.0857026735700023,
|
686 |
+
"grad_norm": 2.0289080142974854,
|
687 |
+
"learning_rate": 4.28356890459364e-05,
|
688 |
+
"loss": 0.459,
|
689 |
+
"step": 4850
|
690 |
+
},
|
691 |
+
{
|
692 |
+
"epoch": 0.08658620628721882,
|
693 |
+
"grad_norm": 2.6102516651153564,
|
694 |
+
"learning_rate": 4.32773851590106e-05,
|
695 |
+
"loss": 0.4275,
|
696 |
+
"step": 4900
|
697 |
+
},
|
698 |
+
{
|
699 |
+
"epoch": 0.08746973900443533,
|
700 |
+
"grad_norm": 2.5842251777648926,
|
701 |
+
"learning_rate": 4.3719081272084805e-05,
|
702 |
+
"loss": 0.5575,
|
703 |
+
"step": 4950
|
704 |
+
},
|
705 |
+
{
|
706 |
+
"epoch": 0.08835327172165185,
|
707 |
+
"grad_norm": 3.6427652835845947,
|
708 |
+
"learning_rate": 4.4160777385159016e-05,
|
709 |
+
"loss": 0.4197,
|
710 |
+
"step": 5000
|
711 |
}
|
712 |
],
|
713 |
"logging_steps": 50,
|