SystemAdmin123 commited on
Commit
ce220e4
·
verified ·
1 Parent(s): 0e5468a

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:697f816d0621c0d00e8e0ca9fe72e8fdd524ca12acf84d2e3bd927115c5c70e9
3
  size 2433024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0bcec12c0a1fb6c8ef45fcba96ed788601918fd8ce201c4fd178411dc5bd310
3
  size 2433024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82e1d534960590164150b98a4eca7e5d48f9e8072cfbd3c56be0575735e0c5b5
3
  size 2498406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86cdaaa9d7dcd5919660434789db9112aa69866e9fe9c4a368f52f5724f61694
3
  size 2498406
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf1f53caaa12767db3c6df563992bbf88f4b84dc57ec5080b22deb9c2c56ec6e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b204b20cdc92a140e2e21e015bdaa04af008c00e0bde30e59edf0f23817a338
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7102693104468778,
5
  "eval_steps": 200,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1791,6 +1791,76 @@
1791
  "eval_samples_per_second": 40.85,
1792
  "eval_steps_per_second": 10.226,
1793
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1794
  }
1795
  ],
1796
  "logging_steps": 10,
@@ -1805,12 +1875,12 @@
1805
  "should_evaluate": false,
1806
  "should_log": false,
1807
  "should_save": true,
1808
- "should_training_stop": false
1809
  },
1810
  "attributes": {}
1811
  }
1812
  },
1813
- "total_flos": 235181703168.0,
1814
  "train_batch_size": 4,
1815
  "trial_name": null,
1816
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7398638650488311,
5
  "eval_steps": 200,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1791
  "eval_samples_per_second": 40.85,
1792
  "eval_steps_per_second": 10.226,
1793
  "step": 2400
1794
+ },
1795
+ {
1796
+ "epoch": 0.7132287659070731,
1797
+ "grad_norm": 0.498046875,
1798
+ "learning_rate": 7.078065009060941e-07,
1799
+ "loss": 10.4885,
1800
+ "step": 2410
1801
+ },
1802
+ {
1803
+ "epoch": 0.7161882213672685,
1804
+ "grad_norm": 0.50390625,
1805
+ "learning_rate": 5.593931933399854e-07,
1806
+ "loss": 10.5281,
1807
+ "step": 2420
1808
+ },
1809
+ {
1810
+ "epoch": 0.7191476768274637,
1811
+ "grad_norm": 0.78125,
1812
+ "learning_rate": 4.2837911263562404e-07,
1813
+ "loss": 10.4985,
1814
+ "step": 2430
1815
+ },
1816
+ {
1817
+ "epoch": 0.7221071322876591,
1818
+ "grad_norm": 0.6328125,
1819
+ "learning_rate": 3.1478718246357173e-07,
1820
+ "loss": 10.6174,
1821
+ "step": 2440
1822
+ },
1823
+ {
1824
+ "epoch": 0.7250665877478544,
1825
+ "grad_norm": 0.99609375,
1826
+ "learning_rate": 2.1863727812254653e-07,
1827
+ "loss": 10.7935,
1828
+ "step": 2450
1829
+ },
1830
+ {
1831
+ "epoch": 0.7280260432080498,
1832
+ "grad_norm": 0.51953125,
1833
+ "learning_rate": 1.3994622306173765e-07,
1834
+ "loss": 10.5046,
1835
+ "step": 2460
1836
+ },
1837
+ {
1838
+ "epoch": 0.730985498668245,
1839
+ "grad_norm": 0.484375,
1840
+ "learning_rate": 7.872778593728258e-08,
1841
+ "loss": 10.5646,
1842
+ "step": 2470
1843
+ },
1844
+ {
1845
+ "epoch": 0.7339449541284404,
1846
+ "grad_norm": 0.6953125,
1847
+ "learning_rate": 3.499267820307184e-08,
1848
+ "loss": 10.5978,
1849
+ "step": 2480
1850
+ },
1851
+ {
1852
+ "epoch": 0.7369044095886357,
1853
+ "grad_norm": 0.6484375,
1854
+ "learning_rate": 8.748552236603757e-09,
1855
+ "loss": 10.6151,
1856
+ "step": 2490
1857
+ },
1858
+ {
1859
+ "epoch": 0.7398638650488311,
1860
+ "grad_norm": 1.140625,
1861
+ "learning_rate": 0.0,
1862
+ "loss": 10.6938,
1863
+ "step": 2500
1864
  }
1865
  ],
1866
  "logging_steps": 10,
 
1875
  "should_evaluate": false,
1876
  "should_log": false,
1877
  "should_save": true,
1878
+ "should_training_stop": true
1879
  },
1880
  "attributes": {}
1881
  }
1882
  },
1883
+ "total_flos": 244972781568.0,
1884
  "train_batch_size": 4,
1885
  "trial_name": null,
1886
  "trial_params": null