guyhadad01 commited on
Commit
fc8a6a5
·
verified ·
1 Parent(s): d0d5868

Training in progress, step 5200, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -286,9 +286,9 @@ print(embeddings.shape)
286
  # Get the similarity scores for the embeddings
287
  similarities = model.similarity(embeddings, embeddings)
288
  print(similarities)
289
- # tensor([[1.0000, 0.6188, 0.1217],
290
- # [0.6188, 1.0000, 0.1507],
291
- # [0.1217, 0.1507, 1.0000]])
292
  ```
293
 
294
  <!--
@@ -1222,6 +1222,8 @@ You can finetune this model on your own dataset.
1222
  </details>
1223
 
1224
  ### Training Logs
 
 
1225
  | Epoch | Step | Training Loss |
1226
  |:------:|:----:|:-------------:|
1227
  | 0.0009 | 50 | 1.3738 |
@@ -1324,7 +1326,12 @@ You can finetune this model on your own dataset.
1324
  | 0.0866 | 4900 | 0.4275 |
1325
  | 0.0875 | 4950 | 0.5575 |
1326
  | 0.0884 | 5000 | 0.4197 |
 
 
 
 
1327
 
 
1328
 
1329
  ### Framework Versions
1330
  - Python: 3.12.11
 
286
  # Get the similarity scores for the embeddings
287
  similarities = model.similarity(embeddings, embeddings)
288
  print(similarities)
289
+ # tensor([[1.0000, 0.6341, 0.1285],
290
+ # [0.6341, 1.0000, 0.1635],
291
+ # [0.1285, 0.1635, 1.0000]])
292
  ```
293
 
294
  <!--
 
1222
  </details>
1223
 
1224
  ### Training Logs
1225
+ <details><summary>Click to expand</summary>
1226
+
1227
  | Epoch | Step | Training Loss |
1228
  |:------:|:----:|:-------------:|
1229
  | 0.0009 | 50 | 1.3738 |
 
1326
  | 0.0866 | 4900 | 0.4275 |
1327
  | 0.0875 | 4950 | 0.5575 |
1328
  | 0.0884 | 5000 | 0.4197 |
1329
+ | 0.0892 | 5050 | 0.4525 |
1330
+ | 0.0901 | 5100 | 0.4469 |
1331
+ | 0.0910 | 5150 | 0.5283 |
1332
+ | 0.0919 | 5200 | 0.4826 |
1333
 
1334
+ </details>
1335
 
1336
  ### Framework Versions
1337
  - Python: 3.12.11
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90e4d2bbf59fbcb40ff4b4462a436528ececec21cfbe639ba0aa1880e8a048b9
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af90c7f23b49e51d466a6d8a0d86939bf37e4d8ea5c0b655ab35b7c151a05cf
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:498b25c3a0cdf659b68bca58a1fe81b240dadbd1c0686b5aa30fdf8a0f4407de
3
  size 180609611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544baf7d60ab6e318f100fc6c2fccfb140e19ae226f8f7d820d236577b1da105
3
  size 180609611
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbd110dd6b99c908a73a401be3d8438b48f355d5cb710bfb9c01fca2894f5f8e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11c0cb9bd2c9c6c5c964ae0bb4e4e4872958c4dc97ac96f40f1c118dec4c9803
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82ad8990572ad11a824b7db276c8af49c179ca7e7724b4e6906cd0ae480a80a8
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb655591367e732af512c0b489f6652e710a205ef4e8286da8729a948980ee0
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77d3a1390f85cf6329aeb072176eb6782a1d00623e0775f00f722a024157ca78
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:407fdddb74e20b064ec7452aeeae963d95b777da5e4cde56c489f916bcf701b2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08835327172165185,
6
  "eval_steps": 500,
7
- "global_step": 5000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -708,6 +708,34 @@
708
  "learning_rate": 4.4160777385159016e-05,
709
  "loss": 0.4197,
710
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  }
712
  ],
713
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.09188740259051793,
6
  "eval_steps": 500,
7
+ "global_step": 5200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
708
  "learning_rate": 4.4160777385159016e-05,
709
  "loss": 0.4197,
710
  "step": 5000
711
+ },
712
+ {
713
+ "epoch": 0.08923680443886838,
714
+ "grad_norm": 1.8962676525115967,
715
+ "learning_rate": 4.4602473498233214e-05,
716
+ "loss": 0.4525,
717
+ "step": 5050
718
+ },
719
+ {
720
+ "epoch": 0.09012033715608489,
721
+ "grad_norm": 2.1373822689056396,
722
+ "learning_rate": 4.5044169611307425e-05,
723
+ "loss": 0.4469,
724
+ "step": 5100
725
+ },
726
+ {
727
+ "epoch": 0.0910038698733014,
728
+ "grad_norm": 5.542126178741455,
729
+ "learning_rate": 4.548586572438163e-05,
730
+ "loss": 0.5283,
731
+ "step": 5150
732
+ },
733
+ {
734
+ "epoch": 0.09188740259051793,
735
+ "grad_norm": 2.4414310455322266,
736
+ "learning_rate": 4.5927561837455834e-05,
737
+ "loss": 0.4826,
738
+ "step": 5200
739
  }
740
  ],
741
  "logging_steps": 50,