Training in progress, step 5600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
-
# tensor([[1.0000, 0.
|
290 |
-
# [0.
|
291 |
-
# [0.
|
292 |
```
|
293 |
|
294 |
<!--
|
@@ -1222,120 +1222,13 @@ You can finetune this model on your own dataset.
|
|
1222 |
</details>
|
1223 |
|
1224 |
### Training Logs
|
1225 |
-
<details><summary>Click to expand</summary>
|
1226 |
-
|
1227 |
| Epoch | Step | Training Loss |
|
1228 |
|:------:|:----:|:-------------:|
|
1229 |
-
| 0.
|
1230 |
-
| 0.
|
1231 |
-
| 0.
|
1232 |
-
| 0.
|
1233 |
-
| 0.0044 | 250 | 0.9431 |
|
1234 |
-
| 0.0053 | 300 | 0.8566 |
|
1235 |
-
| 0.0062 | 350 | 0.8697 |
|
1236 |
-
| 0.0071 | 400 | 0.8099 |
|
1237 |
-
| 0.0080 | 450 | 0.766 |
|
1238 |
-
| 0.0088 | 500 | 0.7402 |
|
1239 |
-
| 0.0097 | 550 | 0.8769 |
|
1240 |
-
| 0.0106 | 600 | 0.6827 |
|
1241 |
-
| 0.0115 | 650 | 0.7252 |
|
1242 |
-
| 0.0124 | 700 | 0.7701 |
|
1243 |
-
| 0.0133 | 750 | 0.7525 |
|
1244 |
-
| 0.0141 | 800 | 0.7097 |
|
1245 |
-
| 0.0150 | 850 | 0.7575 |
|
1246 |
-
| 0.0159 | 900 | 0.6899 |
|
1247 |
-
| 0.0168 | 950 | 0.6655 |
|
1248 |
-
| 0.0177 | 1000 | 0.6601 |
|
1249 |
-
| 0.0186 | 1050 | 0.6925 |
|
1250 |
-
| 0.0194 | 1100 | 0.6169 |
|
1251 |
-
| 0.0203 | 1150 | 0.6367 |
|
1252 |
-
| 0.0212 | 1200 | 0.615 |
|
1253 |
-
| 0.0221 | 1250 | 0.6869 |
|
1254 |
-
| 0.0230 | 1300 | 0.7527 |
|
1255 |
-
| 0.0239 | 1350 | 0.6124 |
|
1256 |
-
| 0.0247 | 1400 | 0.6511 |
|
1257 |
-
| 0.0256 | 1450 | 0.7047 |
|
1258 |
-
| 0.0265 | 1500 | 0.6639 |
|
1259 |
-
| 0.0274 | 1550 | 0.6795 |
|
1260 |
-
| 0.0283 | 1600 | 0.6341 |
|
1261 |
-
| 0.0292 | 1650 | 0.6031 |
|
1262 |
-
| 0.0300 | 1700 | 0.5959 |
|
1263 |
-
| 0.0309 | 1750 | 0.6412 |
|
1264 |
-
| 0.0318 | 1800 | 0.5065 |
|
1265 |
-
| 0.0327 | 1850 | 0.5611 |
|
1266 |
-
| 0.0336 | 1900 | 0.5298 |
|
1267 |
-
| 0.0345 | 1950 | 0.5759 |
|
1268 |
-
| 0.0353 | 2000 | 0.5782 |
|
1269 |
-
| 0.0362 | 2050 | 0.5221 |
|
1270 |
-
| 0.0371 | 2100 | 0.5966 |
|
1271 |
-
| 0.0380 | 2150 | 0.5989 |
|
1272 |
-
| 0.0389 | 2200 | 0.5097 |
|
1273 |
-
| 0.0398 | 2250 | 0.4934 |
|
1274 |
-
| 0.0406 | 2300 | 0.5354 |
|
1275 |
-
| 0.0415 | 2350 | 0.5236 |
|
1276 |
-
| 0.0424 | 2400 | 0.5057 |
|
1277 |
-
| 0.0433 | 2450 | 0.5448 |
|
1278 |
-
| 0.0442 | 2500 | 0.5418 |
|
1279 |
-
| 0.0451 | 2550 | 0.5944 |
|
1280 |
-
| 0.0459 | 2600 | 0.4945 |
|
1281 |
-
| 0.0468 | 2650 | 0.5225 |
|
1282 |
-
| 0.0477 | 2700 | 0.5661 |
|
1283 |
-
| 0.0486 | 2750 | 0.6125 |
|
1284 |
-
| 0.0495 | 2800 | 0.5105 |
|
1285 |
-
| 0.0504 | 2850 | 0.5893 |
|
1286 |
-
| 0.0512 | 2900 | 0.545 |
|
1287 |
-
| 0.0521 | 2950 | 0.5299 |
|
1288 |
-
| 0.0530 | 3000 | 0.5186 |
|
1289 |
-
| 0.0539 | 3050 | 0.5259 |
|
1290 |
-
| 0.0548 | 3100 | 0.6663 |
|
1291 |
-
| 0.0557 | 3150 | 0.5152 |
|
1292 |
-
| 0.0565 | 3200 | 0.5417 |
|
1293 |
-
| 0.0574 | 3250 | 0.5039 |
|
1294 |
-
| 0.0583 | 3300 | 0.4647 |
|
1295 |
-
| 0.0592 | 3350 | 0.4652 |
|
1296 |
-
| 0.0601 | 3400 | 0.4914 |
|
1297 |
-
| 0.0610 | 3450 | 0.5449 |
|
1298 |
-
| 0.0618 | 3500 | 0.5072 |
|
1299 |
-
| 0.0627 | 3550 | 0.5639 |
|
1300 |
-
| 0.0636 | 3600 | 0.4729 |
|
1301 |
-
| 0.0645 | 3650 | 0.6047 |
|
1302 |
-
| 0.0654 | 3700 | 0.5369 |
|
1303 |
-
| 0.0663 | 3750 | 0.5014 |
|
1304 |
-
| 0.0671 | 3800 | 0.5128 |
|
1305 |
-
| 0.0680 | 3850 | 0.5345 |
|
1306 |
-
| 0.0689 | 3900 | 0.479 |
|
1307 |
-
| 0.0698 | 3950 | 0.5652 |
|
1308 |
-
| 0.0707 | 4000 | 0.5272 |
|
1309 |
-
| 0.0716 | 4050 | 0.4904 |
|
1310 |
-
| 0.0724 | 4100 | 0.4755 |
|
1311 |
-
| 0.0733 | 4150 | 0.4897 |
|
1312 |
-
| 0.0742 | 4200 | 0.4679 |
|
1313 |
-
| 0.0751 | 4250 | 0.4712 |
|
1314 |
-
| 0.0760 | 4300 | 0.4779 |
|
1315 |
-
| 0.0769 | 4350 | 0.4463 |
|
1316 |
-
| 0.0778 | 4400 | 0.4917 |
|
1317 |
-
| 0.0786 | 4450 | 0.5572 |
|
1318 |
-
| 0.0795 | 4500 | 0.4553 |
|
1319 |
-
| 0.0804 | 4550 | 0.4598 |
|
1320 |
-
| 0.0813 | 4600 | 0.5445 |
|
1321 |
-
| 0.0822 | 4650 | 0.4537 |
|
1322 |
-
| 0.0831 | 4700 | 0.5158 |
|
1323 |
-
| 0.0839 | 4750 | 0.4598 |
|
1324 |
-
| 0.0848 | 4800 | 0.4662 |
|
1325 |
-
| 0.0857 | 4850 | 0.459 |
|
1326 |
-
| 0.0866 | 4900 | 0.4275 |
|
1327 |
-
| 0.0875 | 4950 | 0.5575 |
|
1328 |
-
| 0.0884 | 5000 | 0.4197 |
|
1329 |
-
| 0.0892 | 5050 | 0.4525 |
|
1330 |
-
| 0.0901 | 5100 | 0.4469 |
|
1331 |
-
| 0.0910 | 5150 | 0.5283 |
|
1332 |
-
| 0.0919 | 5200 | 0.4826 |
|
1333 |
-
| 0.0928 | 5250 | 0.3895 |
|
1334 |
-
| 0.0937 | 5300 | 0.4873 |
|
1335 |
-
| 0.0945 | 5350 | 0.4895 |
|
1336 |
-
| 0.0954 | 5400 | 0.4686 |
|
1337 |
|
1338 |
-
</details>
|
1339 |
|
1340 |
### Framework Versions
|
1341 |
- Python: 3.12.11
|
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
+
# tensor([[1.0000, 0.6467, 0.1007],
|
290 |
+
# [0.6467, 1.0000, 0.1513],
|
291 |
+
# [0.1007, 0.1513, 1.0000]])
|
292 |
```
|
293 |
|
294 |
<!--
|
|
|
1222 |
</details>
|
1223 |
|
1224 |
### Training Logs
|
|
|
|
|
1225 |
| Epoch | Step | Training Loss |
|
1226 |
|:------:|:----:|:-------------:|
|
1227 |
+
| 0.0963 | 5450 | 0.4723 |
|
1228 |
+
| 0.0972 | 5500 | 0.5258 |
|
1229 |
+
| 0.0981 | 5550 | 0.4851 |
|
1230 |
+
| 0.0990 | 5600 | 0.5311 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1231 |
|
|
|
1232 |
|
1233 |
### Framework Versions
|
1234 |
- Python: 3.12.11
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eea0a03b98d68b9630385779c8a8c7e0b7214aab3d11108ff4a7dee8e11165e6
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609611
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5970c5e57ca0ba681567ef6a04ad48dff25e78f70e37dc99bc3e34cce512849
|
3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cd91aca617b42354ae34eacd85892c172326b446d2385efb45fce4e2caab955
|
3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1383
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a63ba06faceabc247ab7f188765454cd84997a1b39e86a5bcd485fb8077cf90a
|
3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28df4acdedad4efa6e4629443faf92ab7af434a0b458c6b8097068d89b1dbe79
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -764,6 +764,34 @@
|
|
764 |
"learning_rate": 4.769434628975265e-05,
|
765 |
"loss": 0.4686,
|
766 |
"step": 5400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
767 |
}
|
768 |
],
|
769 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.09895566432825008,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 5600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
764 |
"learning_rate": 4.769434628975265e-05,
|
765 |
"loss": 0.4686,
|
766 |
"step": 5400
|
767 |
+
},
|
768 |
+
{
|
769 |
+
"epoch": 0.09630506617660052,
|
770 |
+
"grad_norm": 2.2319257259368896,
|
771 |
+
"learning_rate": 4.8136042402826856e-05,
|
772 |
+
"loss": 0.4723,
|
773 |
+
"step": 5450
|
774 |
+
},
|
775 |
+
{
|
776 |
+
"epoch": 0.09718859889381704,
|
777 |
+
"grad_norm": 2.2340879440307617,
|
778 |
+
"learning_rate": 4.857773851590106e-05,
|
779 |
+
"loss": 0.5258,
|
780 |
+
"step": 5500
|
781 |
+
},
|
782 |
+
{
|
783 |
+
"epoch": 0.09807213161103355,
|
784 |
+
"grad_norm": 3.2808139324188232,
|
785 |
+
"learning_rate": 4.901943462897527e-05,
|
786 |
+
"loss": 0.4851,
|
787 |
+
"step": 5550
|
788 |
+
},
|
789 |
+
{
|
790 |
+
"epoch": 0.09895566432825008,
|
791 |
+
"grad_norm": 2.4828484058380127,
|
792 |
+
"learning_rate": 4.946113074204947e-05,
|
793 |
+
"loss": 0.5311,
|
794 |
+
"step": 5600
|
795 |
}
|
796 |
],
|
797 |
"logging_steps": 50,
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6097
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86bf969bd16bf2be7e7bcd2ab22f5624b04e88153ec953d1f322a51c17cc5ce6
|
3 |
size 6097
|