guyhadad01 commited on
Commit
5d22b26
·
verified ·
1 Parent(s): 334c15c

Training in progress, step 8400, checkpoint

Browse files
last-checkpoint/1_Pooling/config.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
- "pooling_mode_max_tokens": false,
6
- "pooling_mode_mean_sqrt_len_tokens": false,
7
- "pooling_mode_weightedmean_tokens": false,
8
- "pooling_mode_lasttoken": false,
9
- "include_prompt": true
10
  }
 
1
  {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
  }
last-checkpoint/README.md CHANGED
@@ -3,7 +3,6 @@ tags:
3
  - sentence-transformers
4
  - sentence-similarity
5
  - feature-extraction
6
- - dense
7
  - generated_from_trainer
8
  - dataset_size:14483470
9
  - loss:MultipleNegativesRankingLoss
@@ -251,7 +250,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
251
 
252
  ```
253
  SentenceTransformer(
254
- (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
255
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
256
  (2): Normalize()
257
  )
@@ -285,10 +284,8 @@ print(embeddings.shape)
285
 
286
  # Get the similarity scores for the embeddings
287
  similarities = model.similarity(embeddings, embeddings)
288
- print(similarities)
289
- # tensor([[1.0000, 0.7055, 0.1480],
290
- # [0.7055, 1.0000, 0.1624],
291
- # [0.1480, 0.1624, 1.0000]])
292
  ```
293
 
294
  <!--
@@ -352,8 +349,7 @@ You can finetune this model on your own dataset.
352
  ```json
353
  {
354
  "scale": 20.0,
355
- "similarity_fct": "cos_sim",
356
- "gather_across_devices": false
357
  }
358
  ```
359
  </details>
@@ -379,8 +375,7 @@ You can finetune this model on your own dataset.
379
  ```json
380
  {
381
  "scale": 20.0,
382
- "similarity_fct": "cos_sim",
383
- "gather_across_devices": false
384
  }
385
  ```
386
  </details>
@@ -406,8 +401,7 @@ You can finetune this model on your own dataset.
406
  ```json
407
  {
408
  "scale": 20.0,
409
- "similarity_fct": "cos_sim",
410
- "gather_across_devices": false
411
  }
412
  ```
413
  </details>
@@ -433,8 +427,7 @@ You can finetune this model on your own dataset.
433
  ```json
434
  {
435
  "scale": 20.0,
436
- "similarity_fct": "cos_sim",
437
- "gather_across_devices": false
438
  }
439
  ```
440
  </details>
@@ -460,8 +453,7 @@ You can finetune this model on your own dataset.
460
  ```json
461
  {
462
  "scale": 20.0,
463
- "similarity_fct": "cos_sim",
464
- "gather_across_devices": false
465
  }
466
  ```
467
  </details>
@@ -487,8 +479,7 @@ You can finetune this model on your own dataset.
487
  ```json
488
  {
489
  "scale": 20.0,
490
- "similarity_fct": "cos_sim",
491
- "gather_across_devices": false
492
  }
493
  ```
494
  </details>
@@ -514,8 +505,7 @@ You can finetune this model on your own dataset.
514
  ```json
515
  {
516
  "scale": 20.0,
517
- "similarity_fct": "cos_sim",
518
- "gather_across_devices": false
519
  }
520
  ```
521
  </details>
@@ -541,8 +531,7 @@ You can finetune this model on your own dataset.
541
  ```json
542
  {
543
  "scale": 20.0,
544
- "similarity_fct": "cos_sim",
545
- "gather_across_devices": false
546
  }
547
  ```
548
  </details>
@@ -568,8 +557,7 @@ You can finetune this model on your own dataset.
568
  ```json
569
  {
570
  "scale": 20.0,
571
- "similarity_fct": "cos_sim",
572
- "gather_across_devices": false
573
  }
574
  ```
575
  </details>
@@ -595,8 +583,7 @@ You can finetune this model on your own dataset.
595
  ```json
596
  {
597
  "scale": 20.0,
598
- "similarity_fct": "cos_sim",
599
- "gather_across_devices": false
600
  }
601
  ```
602
  </details>
@@ -622,8 +609,7 @@ You can finetune this model on your own dataset.
622
  ```json
623
  {
624
  "scale": 20.0,
625
- "similarity_fct": "cos_sim",
626
- "gather_across_devices": false
627
  }
628
  ```
629
  </details>
@@ -649,8 +635,7 @@ You can finetune this model on your own dataset.
649
  ```json
650
  {
651
  "scale": 20.0,
652
- "similarity_fct": "cos_sim",
653
- "gather_across_devices": false
654
  }
655
  ```
656
  </details>
@@ -676,8 +661,7 @@ You can finetune this model on your own dataset.
676
  ```json
677
  {
678
  "scale": 20.0,
679
- "similarity_fct": "cos_sim",
680
- "gather_across_devices": false
681
  }
682
  ```
683
  </details>
@@ -703,8 +687,7 @@ You can finetune this model on your own dataset.
703
  ```json
704
  {
705
  "scale": 20.0,
706
- "similarity_fct": "cos_sim",
707
- "gather_across_devices": false
708
  }
709
  ```
710
  </details>
@@ -730,8 +713,7 @@ You can finetune this model on your own dataset.
730
  ```json
731
  {
732
  "scale": 20.0,
733
- "similarity_fct": "cos_sim",
734
- "gather_across_devices": false
735
  }
736
  ```
737
  </details>
@@ -757,8 +739,7 @@ You can finetune this model on your own dataset.
757
  ```json
758
  {
759
  "scale": 20.0,
760
- "similarity_fct": "cos_sim",
761
- "gather_across_devices": false
762
  }
763
  ```
764
  </details>
@@ -784,8 +765,7 @@ You can finetune this model on your own dataset.
784
  ```json
785
  {
786
  "scale": 20.0,
787
- "similarity_fct": "cos_sim",
788
- "gather_across_devices": false
789
  }
790
  ```
791
  </details>
@@ -811,8 +791,7 @@ You can finetune this model on your own dataset.
811
  ```json
812
  {
813
  "scale": 20.0,
814
- "similarity_fct": "cos_sim",
815
- "gather_across_devices": false
816
  }
817
  ```
818
  </details>
@@ -838,8 +817,7 @@ You can finetune this model on your own dataset.
838
  ```json
839
  {
840
  "scale": 20.0,
841
- "similarity_fct": "cos_sim",
842
- "gather_across_devices": false
843
  }
844
  ```
845
  </details>
@@ -865,8 +843,7 @@ You can finetune this model on your own dataset.
865
  ```json
866
  {
867
  "scale": 20.0,
868
- "similarity_fct": "cos_sim",
869
- "gather_across_devices": false
870
  }
871
  ```
872
  </details>
@@ -892,8 +869,7 @@ You can finetune this model on your own dataset.
892
  ```json
893
  {
894
  "scale": 20.0,
895
- "similarity_fct": "cos_sim",
896
- "gather_across_devices": false
897
  }
898
  ```
899
  </details>
@@ -919,8 +895,7 @@ You can finetune this model on your own dataset.
919
  ```json
920
  {
921
  "scale": 20.0,
922
- "similarity_fct": "cos_sim",
923
- "gather_across_devices": false
924
  }
925
  ```
926
  </details>
@@ -946,8 +921,7 @@ You can finetune this model on your own dataset.
946
  ```json
947
  {
948
  "scale": 20.0,
949
- "similarity_fct": "cos_sim",
950
- "gather_across_devices": false
951
  }
952
  ```
953
  </details>
@@ -973,8 +947,7 @@ You can finetune this model on your own dataset.
973
  ```json
974
  {
975
  "scale": 20.0,
976
- "similarity_fct": "cos_sim",
977
- "gather_across_devices": false
978
  }
979
  ```
980
  </details>
@@ -1000,8 +973,7 @@ You can finetune this model on your own dataset.
1000
  ```json
1001
  {
1002
  "scale": 20.0,
1003
- "similarity_fct": "cos_sim",
1004
- "gather_across_devices": false
1005
  }
1006
  ```
1007
  </details>
@@ -1027,8 +999,7 @@ You can finetune this model on your own dataset.
1027
  ```json
1028
  {
1029
  "scale": 20.0,
1030
- "similarity_fct": "cos_sim",
1031
- "gather_across_devices": false
1032
  }
1033
  ```
1034
  </details>
@@ -1054,8 +1025,7 @@ You can finetune this model on your own dataset.
1054
  ```json
1055
  {
1056
  "scale": 20.0,
1057
- "similarity_fct": "cos_sim",
1058
- "gather_across_devices": false
1059
  }
1060
  ```
1061
  </details>
@@ -1081,8 +1051,7 @@ You can finetune this model on your own dataset.
1081
  ```json
1082
  {
1083
  "scale": 20.0,
1084
- "similarity_fct": "cos_sim",
1085
- "gather_across_devices": false
1086
  }
1087
  ```
1088
  </details>
@@ -1167,7 +1136,7 @@ You can finetune this model on your own dataset.
1167
  - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
1168
  - `deepspeed`: None
1169
  - `label_smoothing_factor`: 0.0
1170
- - `optim`: adamw_torch_fused
1171
  - `optim_args`: None
1172
  - `adafactor`: False
1173
  - `group_by_length`: False
@@ -1185,7 +1154,6 @@ You can finetune this model on your own dataset.
1185
  - `hub_strategy`: checkpoint
1186
  - `hub_private_repo`: None
1187
  - `hub_always_push`: False
1188
- - `hub_revision`: None
1189
  - `gradient_checkpointing`: False
1190
  - `gradient_checkpointing_kwargs`: None
1191
  - `include_inputs_for_metrics`: False
@@ -1210,86 +1178,31 @@ You can finetune this model on your own dataset.
1210
  - `batch_eval_metrics`: False
1211
  - `eval_on_start`: False
1212
  - `use_liger_kernel`: False
1213
- - `liger_kernel_config`: None
1214
  - `eval_use_gather_object`: False
1215
  - `average_tokens_across_devices`: False
1216
  - `prompts`: None
1217
  - `batch_sampler`: batch_sampler
1218
  - `multi_dataset_batch_sampler`: proportional
1219
- - `router_mapping`: {}
1220
- - `learning_rate_mapping`: {}
1221
 
1222
  </details>
1223
 
1224
  ### Training Logs
1225
  | Epoch | Step | Training Loss |
1226
  |:------:|:----:|:-------------:|
1227
- | 0.0963 | 5450 | 0.4723 |
1228
- | 0.0972 | 5500 | 0.5258 |
1229
- | 0.0981 | 5550 | 0.4851 |
1230
- | 0.0990 | 5600 | 0.5311 |
1231
- | 0.0998 | 5650 | 0.411 |
1232
- | 0.1007 | 5700 | 0.4184 |
1233
- | 0.1016 | 5750 | 0.4071 |
1234
- | 0.1025 | 5800 | 0.4712 |
1235
- | 0.1034 | 5850 | 0.4912 |
1236
- | 0.1043 | 5900 | 0.5589 |
1237
- | 0.1051 | 5950 | 0.4507 |
1238
- | 0.1060 | 6000 | 0.5429 |
1239
- | 0.1069 | 6050 | 0.3789 |
1240
- | 0.1078 | 6100 | 0.3949 |
1241
- | 0.1087 | 6150 | 0.4491 |
1242
- | 0.1096 | 6200 | 0.435 |
1243
- | 0.1104 | 6250 | 0.3865 |
1244
- | 0.1113 | 6300 | 0.4175 |
1245
- | 0.1122 | 6350 | 0.4387 |
1246
- | 0.1131 | 6400 | 0.4554 |
1247
- | 0.1140 | 6450 | 0.581 |
1248
- | 0.1149 | 6500 | 0.4746 |
1249
- | 0.1157 | 6550 | 0.4511 |
1250
- | 0.1166 | 6600 | 0.3871 |
1251
- | 0.1175 | 6650 | 0.449 |
1252
- | 0.1184 | 6700 | 0.3458 |
1253
- | 0.1193 | 6750 | 0.4791 |
1254
- | 0.1202 | 6800 | 0.4445 |
1255
- | 0.1210 | 6850 | 0.4907 |
1256
- | 0.1219 | 6900 | 0.5377 |
1257
- | 0.1228 | 6950 | 0.5275 |
1258
- | 0.1237 | 7000 | 0.5489 |
1259
- | 0.1246 | 7050 | 0.3931 |
1260
- | 0.1255 | 7100 | 0.5155 |
1261
- | 0.1263 | 7150 | 0.4282 |
1262
- | 0.1272 | 7200 | 0.4639 |
1263
- | 0.1281 | 7250 | 0.5294 |
1264
- | 0.1290 | 7300 | 0.4121 |
1265
- | 0.1299 | 7350 | 0.4139 |
1266
- | 0.1308 | 7400 | 0.4311 |
1267
- | 0.1316 | 7450 | 0.4025 |
1268
- | 0.1325 | 7500 | 0.4791 |
1269
- | 0.1334 | 7550 | 0.4498 |
1270
- | 0.1343 | 7600 | 0.4538 |
1271
- | 0.1352 | 7650 | 0.4596 |
1272
- | 0.1361 | 7700 | 0.3508 |
1273
- | 0.1369 | 7750 | 0.5145 |
1274
- | 0.1378 | 7800 | 0.3505 |
1275
- | 0.1387 | 7850 | 0.3354 |
1276
- | 0.1396 | 7900 | 0.4474 |
1277
- | 0.1405 | 7950 | 0.3524 |
1278
- | 0.1414 | 8000 | 0.4957 |
1279
- | 0.1422 | 8050 | 0.4461 |
1280
- | 0.1431 | 8100 | 0.4983 |
1281
- | 0.1440 | 8150 | 0.4535 |
1282
- | 0.1449 | 8200 | 0.3668 |
1283
 
1284
 
1285
  ### Framework Versions
1286
- - Python: 3.12.11
1287
- - Sentence Transformers: 5.1.0
1288
- - Transformers: 4.55.2
1289
- - PyTorch: 2.8.0+cu126
1290
- - Accelerate: 1.10.0
1291
- - Datasets: 4.0.0
1292
- - Tokenizers: 0.21.4
1293
 
1294
  ## Citation
1295
 
 
3
  - sentence-transformers
4
  - sentence-similarity
5
  - feature-extraction
 
6
  - generated_from_trainer
7
  - dataset_size:14483470
8
  - loss:MultipleNegativesRankingLoss
 
250
 
251
  ```
252
  SentenceTransformer(
253
+ (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
254
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
255
  (2): Normalize()
256
  )
 
284
 
285
  # Get the similarity scores for the embeddings
286
  similarities = model.similarity(embeddings, embeddings)
287
+ print(similarities.shape)
288
+ # [3, 3]
 
 
289
  ```
290
 
291
  <!--
 
349
  ```json
350
  {
351
  "scale": 20.0,
352
+ "similarity_fct": "cos_sim"
 
353
  }
354
  ```
355
  </details>
 
375
  ```json
376
  {
377
  "scale": 20.0,
378
+ "similarity_fct": "cos_sim"
 
379
  }
380
  ```
381
  </details>
 
401
  ```json
402
  {
403
  "scale": 20.0,
404
+ "similarity_fct": "cos_sim"
 
405
  }
406
  ```
407
  </details>
 
427
  ```json
428
  {
429
  "scale": 20.0,
430
+ "similarity_fct": "cos_sim"
 
431
  }
432
  ```
433
  </details>
 
453
  ```json
454
  {
455
  "scale": 20.0,
456
+ "similarity_fct": "cos_sim"
 
457
  }
458
  ```
459
  </details>
 
479
  ```json
480
  {
481
  "scale": 20.0,
482
+ "similarity_fct": "cos_sim"
 
483
  }
484
  ```
485
  </details>
 
505
  ```json
506
  {
507
  "scale": 20.0,
508
+ "similarity_fct": "cos_sim"
 
509
  }
510
  ```
511
  </details>
 
531
  ```json
532
  {
533
  "scale": 20.0,
534
+ "similarity_fct": "cos_sim"
 
535
  }
536
  ```
537
  </details>
 
557
  ```json
558
  {
559
  "scale": 20.0,
560
+ "similarity_fct": "cos_sim"
 
561
  }
562
  ```
563
  </details>
 
583
  ```json
584
  {
585
  "scale": 20.0,
586
+ "similarity_fct": "cos_sim"
 
587
  }
588
  ```
589
  </details>
 
609
  ```json
610
  {
611
  "scale": 20.0,
612
+ "similarity_fct": "cos_sim"
 
613
  }
614
  ```
615
  </details>
 
635
  ```json
636
  {
637
  "scale": 20.0,
638
+ "similarity_fct": "cos_sim"
 
639
  }
640
  ```
641
  </details>
 
661
  ```json
662
  {
663
  "scale": 20.0,
664
+ "similarity_fct": "cos_sim"
 
665
  }
666
  ```
667
  </details>
 
687
  ```json
688
  {
689
  "scale": 20.0,
690
+ "similarity_fct": "cos_sim"
 
691
  }
692
  ```
693
  </details>
 
713
  ```json
714
  {
715
  "scale": 20.0,
716
+ "similarity_fct": "cos_sim"
 
717
  }
718
  ```
719
  </details>
 
739
  ```json
740
  {
741
  "scale": 20.0,
742
+ "similarity_fct": "cos_sim"
 
743
  }
744
  ```
745
  </details>
 
765
  ```json
766
  {
767
  "scale": 20.0,
768
+ "similarity_fct": "cos_sim"
 
769
  }
770
  ```
771
  </details>
 
791
  ```json
792
  {
793
  "scale": 20.0,
794
+ "similarity_fct": "cos_sim"
 
795
  }
796
  ```
797
  </details>
 
817
  ```json
818
  {
819
  "scale": 20.0,
820
+ "similarity_fct": "cos_sim"
 
821
  }
822
  ```
823
  </details>
 
843
  ```json
844
  {
845
  "scale": 20.0,
846
+ "similarity_fct": "cos_sim"
 
847
  }
848
  ```
849
  </details>
 
869
  ```json
870
  {
871
  "scale": 20.0,
872
+ "similarity_fct": "cos_sim"
 
873
  }
874
  ```
875
  </details>
 
895
  ```json
896
  {
897
  "scale": 20.0,
898
+ "similarity_fct": "cos_sim"
 
899
  }
900
  ```
901
  </details>
 
921
  ```json
922
  {
923
  "scale": 20.0,
924
+ "similarity_fct": "cos_sim"
 
925
  }
926
  ```
927
  </details>
 
947
  ```json
948
  {
949
  "scale": 20.0,
950
+ "similarity_fct": "cos_sim"
 
951
  }
952
  ```
953
  </details>
 
973
  ```json
974
  {
975
  "scale": 20.0,
976
+ "similarity_fct": "cos_sim"
 
977
  }
978
  ```
979
  </details>
 
999
  ```json
1000
  {
1001
  "scale": 20.0,
1002
+ "similarity_fct": "cos_sim"
 
1003
  }
1004
  ```
1005
  </details>
 
1025
  ```json
1026
  {
1027
  "scale": 20.0,
1028
+ "similarity_fct": "cos_sim"
 
1029
  }
1030
  ```
1031
  </details>
 
1051
  ```json
1052
  {
1053
  "scale": 20.0,
1054
+ "similarity_fct": "cos_sim"
 
1055
  }
1056
  ```
1057
  </details>
 
1136
  - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
1137
  - `deepspeed`: None
1138
  - `label_smoothing_factor`: 0.0
1139
+ - `optim`: adamw_torch
1140
  - `optim_args`: None
1141
  - `adafactor`: False
1142
  - `group_by_length`: False
 
1154
  - `hub_strategy`: checkpoint
1155
  - `hub_private_repo`: None
1156
  - `hub_always_push`: False
 
1157
  - `gradient_checkpointing`: False
1158
  - `gradient_checkpointing_kwargs`: None
1159
  - `include_inputs_for_metrics`: False
 
1178
  - `batch_eval_metrics`: False
1179
  - `eval_on_start`: False
1180
  - `use_liger_kernel`: False
 
1181
  - `eval_use_gather_object`: False
1182
  - `average_tokens_across_devices`: False
1183
  - `prompts`: None
1184
  - `batch_sampler`: batch_sampler
1185
  - `multi_dataset_batch_sampler`: proportional
 
 
1186
 
1187
  </details>
1188
 
1189
  ### Training Logs
1190
  | Epoch | Step | Training Loss |
1191
  |:------:|:----:|:-------------:|
1192
+ | 0.1458 | 8250 | 0.4688 |
1193
+ | 0.1467 | 8300 | 0.3967 |
1194
+ | 0.1475 | 8350 | 0.4911 |
1195
+ | 0.1484 | 8400 | 0.4076 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
 
1197
 
1198
  ### Framework Versions
1199
+ - Python: 3.11.13
1200
+ - Sentence Transformers: 4.1.0
1201
+ - Transformers: 4.52.4
1202
+ - PyTorch: 2.6.0+cu124
1203
+ - Accelerate: 1.8.1
1204
+ - Datasets: 3.6.0
1205
+ - Tokenizers: 0.21.2
1206
 
1207
  ## Citation
1208
 
last-checkpoint/config.json CHANGED
@@ -18,7 +18,7 @@
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
  "torch_dtype": "float32",
21
- "transformers_version": "4.55.2",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
  "vocab_size": 30522
 
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
  "torch_dtype": "float32",
21
+ "transformers_version": "4.52.4",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
  "vocab_size": 30522
last-checkpoint/config_sentence_transformers.json CHANGED
@@ -1,14 +1,10 @@
1
  {
2
  "__version__": {
3
- "sentence_transformers": "5.1.0",
4
- "transformers": "4.55.2",
5
- "pytorch": "2.8.0+cu126"
6
- },
7
- "model_type": "SentenceTransformer",
8
- "prompts": {
9
- "query": "",
10
- "document": ""
11
  },
 
12
  "default_prompt_name": null,
13
  "similarity_fn_name": "cosine"
14
  }
 
1
  {
2
  "__version__": {
3
+ "sentence_transformers": "4.1.0",
4
+ "transformers": "4.52.4",
5
+ "pytorch": "2.6.0+cu124"
 
 
 
 
 
6
  },
7
+ "prompts": {},
8
  "default_prompt_name": null,
9
  "similarity_fn_name": "cosine"
10
  }
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4640bbfebbb11ecea082ff1af6d855d052c0c8e1bdf9168d4f7359599f62aabd
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4889319c3ce656b4822f1a50aac0284145a17b11d2f3ae21fabab4cf0583b0e
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccd29683807f5e0218e2f5d58ca633b3fdf6424d5b06a7fd7dd1632307029b7e
3
- size 180609611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef06c105348a65b7eed64942f5dea6f43606f7f0ef5b2b0bde7930b99c26cac5
3
+ size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eae3e1579fe71c3a791c337c98621066f7781ea53baa1bbd326d61e1273a7011
3
- size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d487123f633c1d3c5f62a2fbd2d4a814f21aa207d5180464abde74c25ff991
3
+ size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1b7589e0e51e5ea2c39973809a8f993519f86ffc0bc8e5dc5baeb72e06aa3bb
3
- size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:899a642947e70d7e60cf2eeee96b1e912dfb435fbf6e019478dc646f870906c8
3
+ size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71575ac6af3608ee2ab21516b68821ed7b385bc77900a19bfec38f30c0387852
3
- size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d81eef3cd79f288260f908a6802f0ede97e49007282e07a31f47a48189a183b
3
+ size 1064
last-checkpoint/sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 256,
3
- "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
  }
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.14489936562350905,
6
  "eval_steps": 500,
7
- "global_step": 8200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1156,6 +1156,34 @@
1156
  "learning_rate": 4.750741198876913e-05,
1157
  "loss": 0.3668,
1158
  "step": 8200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1159
  }
1160
  ],
1161
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.1484334964923751,
6
  "eval_steps": 500,
7
+ "global_step": 8400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1156
  "learning_rate": 4.750741198876913e-05,
1157
  "loss": 0.3668,
1158
  "step": 8200
1159
+ },
1160
+ {
1161
+ "epoch": 0.14578289834072555,
1162
+ "grad_norm": 1.9963476657867432,
1163
+ "learning_rate": 4.7458325970430585e-05,
1164
+ "loss": 0.4688,
1165
+ "step": 8250
1166
+ },
1167
+ {
1168
+ "epoch": 0.14666643105794208,
1169
+ "grad_norm": 1.7402074337005615,
1170
+ "learning_rate": 4.740923995209205e-05,
1171
+ "loss": 0.3967,
1172
+ "step": 8300
1173
+ },
1174
+ {
1175
+ "epoch": 0.14754996377515858,
1176
+ "grad_norm": 2.0074145793914795,
1177
+ "learning_rate": 4.736015393375351e-05,
1178
+ "loss": 0.4911,
1179
+ "step": 8350
1180
+ },
1181
+ {
1182
+ "epoch": 0.1484334964923751,
1183
+ "grad_norm": 1.7804876565933228,
1184
+ "learning_rate": 4.731106791541497e-05,
1185
+ "loss": 0.4076,
1186
+ "step": 8400
1187
  }
1188
  ],
1189
  "logging_steps": 50,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86bf969bd16bf2be7e7bcd2ab22f5624b04e88153ec953d1f322a51c17cc5ce6
3
- size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:764686e45491e3136468635b2d7d30b214689849603924e41f88b81ff0c3a1d0
3
+ size 5560