guyhadad01 commited on
Commit
aafbdde
·
verified ·
1 Parent(s): f163735

Training in progress, step 5600, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -286,9 +286,9 @@ print(embeddings.shape)
286
  # Get the similarity scores for the embeddings
287
  similarities = model.similarity(embeddings, embeddings)
288
  print(similarities)
289
- # tensor([[1.0000, 0.6268, 0.1112],
290
- # [0.6268, 1.0000, 0.1476],
291
- # [0.1112, 0.1476, 1.0000]])
292
  ```
293
 
294
  <!--
@@ -1222,120 +1222,13 @@ You can finetune this model on your own dataset.
1222
  </details>
1223
 
1224
  ### Training Logs
1225
- <details><summary>Click to expand</summary>
1226
-
1227
  | Epoch | Step | Training Loss |
1228
  |:------:|:----:|:-------------:|
1229
- | 0.0009 | 50 | 1.3738 |
1230
- | 0.0018 | 100 | 1.1661 |
1231
- | 0.0027 | 150 | 1.2107 |
1232
- | 0.0035 | 200 | 0.9855 |
1233
- | 0.0044 | 250 | 0.9431 |
1234
- | 0.0053 | 300 | 0.8566 |
1235
- | 0.0062 | 350 | 0.8697 |
1236
- | 0.0071 | 400 | 0.8099 |
1237
- | 0.0080 | 450 | 0.766 |
1238
- | 0.0088 | 500 | 0.7402 |
1239
- | 0.0097 | 550 | 0.8769 |
1240
- | 0.0106 | 600 | 0.6827 |
1241
- | 0.0115 | 650 | 0.7252 |
1242
- | 0.0124 | 700 | 0.7701 |
1243
- | 0.0133 | 750 | 0.7525 |
1244
- | 0.0141 | 800 | 0.7097 |
1245
- | 0.0150 | 850 | 0.7575 |
1246
- | 0.0159 | 900 | 0.6899 |
1247
- | 0.0168 | 950 | 0.6655 |
1248
- | 0.0177 | 1000 | 0.6601 |
1249
- | 0.0186 | 1050 | 0.6925 |
1250
- | 0.0194 | 1100 | 0.6169 |
1251
- | 0.0203 | 1150 | 0.6367 |
1252
- | 0.0212 | 1200 | 0.615 |
1253
- | 0.0221 | 1250 | 0.6869 |
1254
- | 0.0230 | 1300 | 0.7527 |
1255
- | 0.0239 | 1350 | 0.6124 |
1256
- | 0.0247 | 1400 | 0.6511 |
1257
- | 0.0256 | 1450 | 0.7047 |
1258
- | 0.0265 | 1500 | 0.6639 |
1259
- | 0.0274 | 1550 | 0.6795 |
1260
- | 0.0283 | 1600 | 0.6341 |
1261
- | 0.0292 | 1650 | 0.6031 |
1262
- | 0.0300 | 1700 | 0.5959 |
1263
- | 0.0309 | 1750 | 0.6412 |
1264
- | 0.0318 | 1800 | 0.5065 |
1265
- | 0.0327 | 1850 | 0.5611 |
1266
- | 0.0336 | 1900 | 0.5298 |
1267
- | 0.0345 | 1950 | 0.5759 |
1268
- | 0.0353 | 2000 | 0.5782 |
1269
- | 0.0362 | 2050 | 0.5221 |
1270
- | 0.0371 | 2100 | 0.5966 |
1271
- | 0.0380 | 2150 | 0.5989 |
1272
- | 0.0389 | 2200 | 0.5097 |
1273
- | 0.0398 | 2250 | 0.4934 |
1274
- | 0.0406 | 2300 | 0.5354 |
1275
- | 0.0415 | 2350 | 0.5236 |
1276
- | 0.0424 | 2400 | 0.5057 |
1277
- | 0.0433 | 2450 | 0.5448 |
1278
- | 0.0442 | 2500 | 0.5418 |
1279
- | 0.0451 | 2550 | 0.5944 |
1280
- | 0.0459 | 2600 | 0.4945 |
1281
- | 0.0468 | 2650 | 0.5225 |
1282
- | 0.0477 | 2700 | 0.5661 |
1283
- | 0.0486 | 2750 | 0.6125 |
1284
- | 0.0495 | 2800 | 0.5105 |
1285
- | 0.0504 | 2850 | 0.5893 |
1286
- | 0.0512 | 2900 | 0.545 |
1287
- | 0.0521 | 2950 | 0.5299 |
1288
- | 0.0530 | 3000 | 0.5186 |
1289
- | 0.0539 | 3050 | 0.5259 |
1290
- | 0.0548 | 3100 | 0.6663 |
1291
- | 0.0557 | 3150 | 0.5152 |
1292
- | 0.0565 | 3200 | 0.5417 |
1293
- | 0.0574 | 3250 | 0.5039 |
1294
- | 0.0583 | 3300 | 0.4647 |
1295
- | 0.0592 | 3350 | 0.4652 |
1296
- | 0.0601 | 3400 | 0.4914 |
1297
- | 0.0610 | 3450 | 0.5449 |
1298
- | 0.0618 | 3500 | 0.5072 |
1299
- | 0.0627 | 3550 | 0.5639 |
1300
- | 0.0636 | 3600 | 0.4729 |
1301
- | 0.0645 | 3650 | 0.6047 |
1302
- | 0.0654 | 3700 | 0.5369 |
1303
- | 0.0663 | 3750 | 0.5014 |
1304
- | 0.0671 | 3800 | 0.5128 |
1305
- | 0.0680 | 3850 | 0.5345 |
1306
- | 0.0689 | 3900 | 0.479 |
1307
- | 0.0698 | 3950 | 0.5652 |
1308
- | 0.0707 | 4000 | 0.5272 |
1309
- | 0.0716 | 4050 | 0.4904 |
1310
- | 0.0724 | 4100 | 0.4755 |
1311
- | 0.0733 | 4150 | 0.4897 |
1312
- | 0.0742 | 4200 | 0.4679 |
1313
- | 0.0751 | 4250 | 0.4712 |
1314
- | 0.0760 | 4300 | 0.4779 |
1315
- | 0.0769 | 4350 | 0.4463 |
1316
- | 0.0778 | 4400 | 0.4917 |
1317
- | 0.0786 | 4450 | 0.5572 |
1318
- | 0.0795 | 4500 | 0.4553 |
1319
- | 0.0804 | 4550 | 0.4598 |
1320
- | 0.0813 | 4600 | 0.5445 |
1321
- | 0.0822 | 4650 | 0.4537 |
1322
- | 0.0831 | 4700 | 0.5158 |
1323
- | 0.0839 | 4750 | 0.4598 |
1324
- | 0.0848 | 4800 | 0.4662 |
1325
- | 0.0857 | 4850 | 0.459 |
1326
- | 0.0866 | 4900 | 0.4275 |
1327
- | 0.0875 | 4950 | 0.5575 |
1328
- | 0.0884 | 5000 | 0.4197 |
1329
- | 0.0892 | 5050 | 0.4525 |
1330
- | 0.0901 | 5100 | 0.4469 |
1331
- | 0.0910 | 5150 | 0.5283 |
1332
- | 0.0919 | 5200 | 0.4826 |
1333
- | 0.0928 | 5250 | 0.3895 |
1334
- | 0.0937 | 5300 | 0.4873 |
1335
- | 0.0945 | 5350 | 0.4895 |
1336
- | 0.0954 | 5400 | 0.4686 |
1337
 
1338
- </details>
1339
 
1340
  ### Framework Versions
1341
  - Python: 3.12.11
 
286
  # Get the similarity scores for the embeddings
287
  similarities = model.similarity(embeddings, embeddings)
288
  print(similarities)
289
+ # tensor([[1.0000, 0.6467, 0.1007],
290
+ # [0.6467, 1.0000, 0.1513],
291
+ # [0.1007, 0.1513, 1.0000]])
292
  ```
293
 
294
  <!--
 
1222
  </details>
1223
 
1224
  ### Training Logs
 
 
1225
  | Epoch | Step | Training Loss |
1226
  |:------:|:----:|:-------------:|
1227
+ | 0.0963 | 5450 | 0.4723 |
1228
+ | 0.0972 | 5500 | 0.5258 |
1229
+ | 0.0981 | 5550 | 0.4851 |
1230
+ | 0.0990 | 5600 | 0.5311 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1231
 
 
1232
 
1233
  ### Framework Versions
1234
  - Python: 3.12.11
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a7b3038038d614d02d13200e3426ce1cee9998e3dd720f3ef8373fd32bc320c
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea0a03b98d68b9630385779c8a8c7e0b7214aab3d11108ff4a7dee8e11165e6
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da23c5a242ed1a18ccfa540abf270f8af47081a60b90b0528855f1e52bf8ac00
3
  size 180609611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5970c5e57ca0ba681567ef6a04ad48dff25e78f70e37dc99bc3e34cce512849
3
  size 180609611
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43b1cabcf4d622f4eeb0e2e256d17e427254b320b0d958e0925925fc3965ffc2
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd91aca617b42354ae34eacd85892c172326b446d2385efb45fce4e2caab955
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93d3edcb87fa33fd6a7048f57178907c582fac7657ef4f6ed7cd1a489f3a9b01
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63ba06faceabc247ab7f188765454cd84997a1b39e86a5bcd485fb8077cf90a
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cde96e015f065de3d9994d1a7735820057fec16cad19c67655b731af60c92ecc
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28df4acdedad4efa6e4629443faf92ab7af434a0b458c6b8097068d89b1dbe79
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.095421533459384,
6
  "eval_steps": 500,
7
- "global_step": 5400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -764,6 +764,34 @@
764
  "learning_rate": 4.769434628975265e-05,
765
  "loss": 0.4686,
766
  "step": 5400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
  }
768
  ],
769
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.09895566432825008,
6
  "eval_steps": 500,
7
+ "global_step": 5600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
764
  "learning_rate": 4.769434628975265e-05,
765
  "loss": 0.4686,
766
  "step": 5400
767
+ },
768
+ {
769
+ "epoch": 0.09630506617660052,
770
+ "grad_norm": 2.2319257259368896,
771
+ "learning_rate": 4.8136042402826856e-05,
772
+ "loss": 0.4723,
773
+ "step": 5450
774
+ },
775
+ {
776
+ "epoch": 0.09718859889381704,
777
+ "grad_norm": 2.2340879440307617,
778
+ "learning_rate": 4.857773851590106e-05,
779
+ "loss": 0.5258,
780
+ "step": 5500
781
+ },
782
+ {
783
+ "epoch": 0.09807213161103355,
784
+ "grad_norm": 3.2808139324188232,
785
+ "learning_rate": 4.901943462897527e-05,
786
+ "loss": 0.4851,
787
+ "step": 5550
788
+ },
789
+ {
790
+ "epoch": 0.09895566432825008,
791
+ "grad_norm": 2.4828484058380127,
792
+ "learning_rate": 4.946113074204947e-05,
793
+ "loss": 0.5311,
794
+ "step": 5600
795
  }
796
  ],
797
  "logging_steps": 50,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5a2600b2df7b8aa86e9a368609e29900445ebc9cdf1eaf37dde9ffa01ded762
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86bf969bd16bf2be7e7bcd2ab22f5624b04e88153ec953d1f322a51c17cc5ce6
3
  size 6097