Training in progress, step 8200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
-
# tensor([[1.0000, 0.
|
290 |
-
# [0.
|
291 |
-
# [0.
|
292 |
```
|
293 |
|
294 |
<!--
|
@@ -1276,6 +1276,10 @@ You can finetune this model on your own dataset.
|
|
1276 |
| 0.1396 | 7900 | 0.4474 |
|
1277 |
| 0.1405 | 7950 | 0.3524 |
|
1278 |
| 0.1414 | 8000 | 0.4957 |
|
|
|
|
|
|
|
|
|
1279 |
|
1280 |
|
1281 |
### Framework Versions
|
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
+
# tensor([[1.0000, 0.7055, 0.1480],
|
290 |
+
# [0.7055, 1.0000, 0.1624],
|
291 |
+
# [0.1480, 0.1624, 1.0000]])
|
292 |
```
|
293 |
|
294 |
<!--
|
|
|
1276 |
| 0.1396 | 7900 | 0.4474 |
|
1277 |
| 0.1405 | 7950 | 0.3524 |
|
1278 |
| 0.1414 | 8000 | 0.4957 |
|
1279 |
+
| 0.1422 | 8050 | 0.4461 |
|
1280 |
+
| 0.1431 | 8100 | 0.4983 |
|
1281 |
+
| 0.1440 | 8150 | 0.4535 |
|
1282 |
+
| 0.1449 | 8200 | 0.3668 |
|
1283 |
|
1284 |
|
1285 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4640bbfebbb11ecea082ff1af6d855d052c0c8e1bdf9168d4f7359599f62aabd
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609611
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccd29683807f5e0218e2f5d58ca633b3fdf6424d5b06a7fd7dd1632307029b7e
|
3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eae3e1579fe71c3a791c337c98621066f7781ea53baa1bbd326d61e1273a7011
|
3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1383
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1b7589e0e51e5ea2c39973809a8f993519f86ffc0bc8e5dc5baeb72e06aa3bb
|
3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71575ac6af3608ee2ab21516b68821ed7b385bc77900a19bfec38f30c0387852
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -1128,6 +1128,34 @@
|
|
1128 |
"learning_rate": 4.770375606212327e-05,
|
1129 |
"loss": 0.4957,
|
1130 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1131 |
}
|
1132 |
],
|
1133 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.14489936562350905,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 8200,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
1128 |
"learning_rate": 4.770375606212327e-05,
|
1129 |
"loss": 0.4957,
|
1130 |
"step": 8000
|
1131 |
+
},
|
1132 |
+
{
|
1133 |
+
"epoch": 0.1422487674718595,
|
1134 |
+
"grad_norm": 1.813818335533142,
|
1135 |
+
"learning_rate": 4.765467004378473e-05,
|
1136 |
+
"loss": 0.4461,
|
1137 |
+
"step": 8050
|
1138 |
+
},
|
1139 |
+
{
|
1140 |
+
"epoch": 0.143132300189076,
|
1141 |
+
"grad_norm": 1.936123013496399,
|
1142 |
+
"learning_rate": 4.760558402544619e-05,
|
1143 |
+
"loss": 0.4983,
|
1144 |
+
"step": 8100
|
1145 |
+
},
|
1146 |
+
{
|
1147 |
+
"epoch": 0.14401583290629252,
|
1148 |
+
"grad_norm": 2.0068929195404053,
|
1149 |
+
"learning_rate": 4.7556498007107656e-05,
|
1150 |
+
"loss": 0.4535,
|
1151 |
+
"step": 8150
|
1152 |
+
},
|
1153 |
+
{
|
1154 |
+
"epoch": 0.14489936562350905,
|
1155 |
+
"grad_norm": 1.6743545532226562,
|
1156 |
+
"learning_rate": 4.750741198876913e-05,
|
1157 |
+
"loss": 0.3668,
|
1158 |
+
"step": 8200
|
1159 |
}
|
1160 |
],
|
1161 |
"logging_steps": 50,
|