rakhman-llm commited on
Commit
817d0bd
·
verified ·
1 Parent(s): 48d0f17

Training in progress, step 11500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6be50dd6f773aa1f48148cbd37fadbc53c1f82b20839270278b093c580fda84d
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef77a091d99ff91eba75355ced068d82754fe09197e0fe3fb0024d4681e880cd
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7a8731c7a1753c2a934fea42dc42bc1494d508ed21f32dcb78a226e403709f0
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13d127c8a77b3a6f121a1e1766bf489c05dc85af0fb609f9d0474c53a6bbc073
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad1edd41eefae5337989de90c37428566177e0258f6ab839f3f0a3e2bcd645ce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:657e8f4565cffd7e4abf75f56b2fdcd3ae235671ae9b5c722c485957c12a53d9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62568f1a18857cf8edd17d9d189f58f7644089636cf8dea79c190056990aaec9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e2a369098bdc7bbbac32b35b2de9650fe514b2c352f9f563c5554da15cddf2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08220627158880234,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-11000",
4
- "epoch": 1.76,
5
  "eval_steps": 500,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1723,6 +1723,84 @@
1723
  "eval_samples_per_second": 17.1,
1724
  "eval_steps_per_second": 2.138,
1725
  "step": 11000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1726
  }
1727
  ],
1728
  "logging_steps": 50,
@@ -1742,7 +1820,7 @@
1742
  "attributes": {}
1743
  }
1744
  },
1745
- "total_flos": 5.358829436928e+16,
1746
  "train_batch_size": 8,
1747
  "trial_name": null,
1748
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08207839727401733,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-11500",
4
+ "epoch": 1.8399999999999999,
5
  "eval_steps": 500,
6
+ "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1723
  "eval_samples_per_second": 17.1,
1724
  "eval_steps_per_second": 2.138,
1725
  "step": 11000
1726
+ },
1727
+ {
1728
+ "epoch": 1.768,
1729
+ "grad_norm": 5123.0029296875,
1730
+ "learning_rate": 1.232e-05,
1731
+ "loss": 0.0562,
1732
+ "step": 11050
1733
+ },
1734
+ {
1735
+ "epoch": 1.776,
1736
+ "grad_norm": 7975.41064453125,
1737
+ "learning_rate": 1.224e-05,
1738
+ "loss": 0.0515,
1739
+ "step": 11100
1740
+ },
1741
+ {
1742
+ "epoch": 1.784,
1743
+ "grad_norm": 5846.47705078125,
1744
+ "learning_rate": 1.216e-05,
1745
+ "loss": 0.054,
1746
+ "step": 11150
1747
+ },
1748
+ {
1749
+ "epoch": 1.792,
1750
+ "grad_norm": 7158.12109375,
1751
+ "learning_rate": 1.2080000000000001e-05,
1752
+ "loss": 0.0577,
1753
+ "step": 11200
1754
+ },
1755
+ {
1756
+ "epoch": 1.8,
1757
+ "grad_norm": 5405.5224609375,
1758
+ "learning_rate": 1.2e-05,
1759
+ "loss": 0.0538,
1760
+ "step": 11250
1761
+ },
1762
+ {
1763
+ "epoch": 1.808,
1764
+ "grad_norm": 7155.9677734375,
1765
+ "learning_rate": 1.192e-05,
1766
+ "loss": 0.0539,
1767
+ "step": 11300
1768
+ },
1769
+ {
1770
+ "epoch": 1.8159999999999998,
1771
+ "grad_norm": 6886.369140625,
1772
+ "learning_rate": 1.184e-05,
1773
+ "loss": 0.0565,
1774
+ "step": 11350
1775
+ },
1776
+ {
1777
+ "epoch": 1.8239999999999998,
1778
+ "grad_norm": 7139.15283203125,
1779
+ "learning_rate": 1.1760000000000001e-05,
1780
+ "loss": 0.0539,
1781
+ "step": 11400
1782
+ },
1783
+ {
1784
+ "epoch": 1.8319999999999999,
1785
+ "grad_norm": 5965.82666015625,
1786
+ "learning_rate": 1.168e-05,
1787
+ "loss": 0.0587,
1788
+ "step": 11450
1789
+ },
1790
+ {
1791
+ "epoch": 1.8399999999999999,
1792
+ "grad_norm": 6557.6708984375,
1793
+ "learning_rate": 1.16e-05,
1794
+ "loss": 0.0552,
1795
+ "step": 11500
1796
+ },
1797
+ {
1798
+ "epoch": 1.8399999999999999,
1799
+ "eval_loss": 0.08207839727401733,
1800
+ "eval_runtime": 116.751,
1801
+ "eval_samples_per_second": 17.13,
1802
+ "eval_steps_per_second": 2.141,
1803
+ "step": 11500
1804
  }
1805
  ],
1806
  "logging_steps": 50,
 
1820
  "attributes": {}
1821
  }
1822
  },
1823
+ "total_flos": 5.602412593152e+16,
1824
  "train_batch_size": 8,
1825
  "trial_name": null,
1826
  "trial_params": null