SystemAdmin123 commited on
Commit
2c4e840
·
verified ·
1 Parent(s): 797ffc1

Training in progress, step 560, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc81f761cfc8bdce5d4645d596275ebae46f5b388451e19e4c8d7151b84c9715
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4b652f70bb7bf6937fc33a7d01c3f03c6342b239ff26ece13523d743f26cd8
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7da038dd672630df7e600441c36fa83e824a9e2fb99b73b2420909cbb2ad6eb
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9bd27753d862a088993cd46574f25b05c2cda11093f946598dc46956f484d9
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92e5d26c381151ab0de50104b9e5ce82eabe494b836bd9ed820acb0c36701679
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed860de1deed7cdc39bd811790ea63ae1b083edeca3b67d013613d748b62c05e
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a091b0c39bd9b27ef31e5a41bbc9472016dcf757f5df52524848568afc94178
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a69028f8407cef8ebd9eb20ad7eb1ceb93e80717dfdef9783a0b74dc25525cac
3
  size 1168138808
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1e282f5d03b3ef029829e361590621b745fcf0f26c09b36123475b88e1779ed
3
  size 16311821444
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:941b6102c0e182d11f8867f5507451850e6f35a383ea6b5af7bb2b7ef99c6f80
3
  size 16311821444
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:631bcc7b2b54adc0f8c13ccb364aeb8eebda651c6ce4485857a6aba1f8d727b5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7de5eba3aed7d95f28e0f9dfb9375531a0dbaceef85a8671b534835ab0e70865
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e6669863251bb7520c41ff8dcf5a27af1ba81a55e01b68c96831c6305cf7b27
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17839f2358f75951188e6934b7a52a31bb75248661d49e4c6ce09414906fd23
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.13450594930160373,
5
  "eval_steps": 20,
6
- "global_step": 520,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -587,6 +587,50 @@
587
  "eval_samples_per_second": 8.776,
588
  "eval_steps_per_second": 4.388,
589
  "step": 520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
  }
591
  ],
592
  "logging_steps": 10,
@@ -606,7 +650,7 @@
606
  "attributes": {}
607
  }
608
  },
609
- "total_flos": 9.590933494431744e+16,
610
  "train_batch_size": 2,
611
  "trial_name": null,
612
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.14485256078634248,
5
  "eval_steps": 20,
6
+ "global_step": 560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
587
  "eval_samples_per_second": 8.776,
588
  "eval_steps_per_second": 4.388,
589
  "step": 520
590
+ },
591
+ {
592
+ "epoch": 0.1370926021727884,
593
+ "grad_norm": 15.1875,
594
+ "learning_rate": 0.00018598991661526572,
595
+ "loss": 3.0777,
596
+ "step": 530
597
+ },
598
+ {
599
+ "epoch": 0.1396792550439731,
600
+ "grad_norm": 25.0,
601
+ "learning_rate": 0.00018530718407223974,
602
+ "loss": 3.3325,
603
+ "step": 540
604
+ },
605
+ {
606
+ "epoch": 0.1396792550439731,
607
+ "eval_loss": 2.90164852142334,
608
+ "eval_runtime": 106.0394,
609
+ "eval_samples_per_second": 8.11,
610
+ "eval_steps_per_second": 4.055,
611
+ "step": 540
612
+ },
613
+ {
614
+ "epoch": 0.14226590791515778,
615
+ "grad_norm": 13.5625,
616
+ "learning_rate": 0.00018460952524209355,
617
+ "loss": 2.5467,
618
+ "step": 550
619
+ },
620
+ {
621
+ "epoch": 0.14485256078634248,
622
+ "grad_norm": 13.125,
623
+ "learning_rate": 0.00018389706219492147,
624
+ "loss": 3.2562,
625
+ "step": 560
626
+ },
627
+ {
628
+ "epoch": 0.14485256078634248,
629
+ "eval_loss": 2.854039192199707,
630
+ "eval_runtime": 99.183,
631
+ "eval_samples_per_second": 8.671,
632
+ "eval_steps_per_second": 4.335,
633
+ "step": 560
634
  }
635
  ],
636
  "logging_steps": 10,
 
650
  "attributes": {}
651
  }
652
  },
653
+ "total_flos": 1.0328697609388032e+17,
654
  "train_batch_size": 2,
655
  "trial_name": null,
656
  "trial_params": null