rakhman-llm commited on
Commit
bed216b
·
verified ·
1 Parent(s): 63ff26e

Training in progress, step 11000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:013308b4dc1251389723706bb70a4c12dd3e0f1c0451dc06722fb7fff47c38dc
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be50dd6f773aa1f48148cbd37fadbc53c1f82b20839270278b093c580fda84d
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:580eafdf89a0de8a5bf71611b2d46376bc396d69d739d11c02cf0fa1c01e5d26
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a8731c7a1753c2a934fea42dc42bc1494d508ed21f32dcb78a226e403709f0
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b375d24b4f51731ac4974850b0a2bf3cfbc153b3c9a53e800d669342a8ff2a30
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad1edd41eefae5337989de90c37428566177e0258f6ab839f3f0a3e2bcd645ce
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62b81d85aecb38c3a9a2c050795de755f75a43f89e29b6a5cc7c6ab514e2f67e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62568f1a18857cf8edd17d9d189f58f7644089636cf8dea79c190056990aaec9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08225961029529572,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-10500",
4
- "epoch": 1.6800000000000002,
5
  "eval_steps": 500,
6
- "global_step": 10500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1645,6 +1645,84 @@
1645
  "eval_samples_per_second": 17.114,
1646
  "eval_steps_per_second": 2.139,
1647
  "step": 10500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1648
  }
1649
  ],
1650
  "logging_steps": 50,
@@ -1664,7 +1742,7 @@
1664
  "attributes": {}
1665
  }
1666
  },
1667
- "total_flos": 5.115246280704e+16,
1668
  "train_batch_size": 8,
1669
  "trial_name": null,
1670
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08220627158880234,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-11000",
4
+ "epoch": 1.76,
5
  "eval_steps": 500,
6
+ "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1645
  "eval_samples_per_second": 17.114,
1646
  "eval_steps_per_second": 2.139,
1647
  "step": 10500
1648
+ },
1649
+ {
1650
+ "epoch": 1.688,
1651
+ "grad_norm": 5443.7119140625,
1652
+ "learning_rate": 1.3120000000000001e-05,
1653
+ "loss": 0.0554,
1654
+ "step": 10550
1655
+ },
1656
+ {
1657
+ "epoch": 1.696,
1658
+ "grad_norm": 6497.8408203125,
1659
+ "learning_rate": 1.304e-05,
1660
+ "loss": 0.057,
1661
+ "step": 10600
1662
+ },
1663
+ {
1664
+ "epoch": 1.704,
1665
+ "grad_norm": 5618.49853515625,
1666
+ "learning_rate": 1.296e-05,
1667
+ "loss": 0.0498,
1668
+ "step": 10650
1669
+ },
1670
+ {
1671
+ "epoch": 1.712,
1672
+ "grad_norm": 7447.96728515625,
1673
+ "learning_rate": 1.288e-05,
1674
+ "loss": 0.0568,
1675
+ "step": 10700
1676
+ },
1677
+ {
1678
+ "epoch": 1.72,
1679
+ "grad_norm": 8283.306640625,
1680
+ "learning_rate": 1.2800000000000001e-05,
1681
+ "loss": 0.0566,
1682
+ "step": 10750
1683
+ },
1684
+ {
1685
+ "epoch": 1.728,
1686
+ "grad_norm": 7497.0419921875,
1687
+ "learning_rate": 1.272e-05,
1688
+ "loss": 0.0502,
1689
+ "step": 10800
1690
+ },
1691
+ {
1692
+ "epoch": 1.736,
1693
+ "grad_norm": 8445.2421875,
1694
+ "learning_rate": 1.2640000000000001e-05,
1695
+ "loss": 0.0562,
1696
+ "step": 10850
1697
+ },
1698
+ {
1699
+ "epoch": 1.744,
1700
+ "grad_norm": 15980.0498046875,
1701
+ "learning_rate": 1.2560000000000002e-05,
1702
+ "loss": 0.0588,
1703
+ "step": 10900
1704
+ },
1705
+ {
1706
+ "epoch": 1.752,
1707
+ "grad_norm": 5444.55615234375,
1708
+ "learning_rate": 1.2479999999999999e-05,
1709
+ "loss": 0.0564,
1710
+ "step": 10950
1711
+ },
1712
+ {
1713
+ "epoch": 1.76,
1714
+ "grad_norm": 7009.3037109375,
1715
+ "learning_rate": 1.24e-05,
1716
+ "loss": 0.0549,
1717
+ "step": 11000
1718
+ },
1719
+ {
1720
+ "epoch": 1.76,
1721
+ "eval_loss": 0.08220627158880234,
1722
+ "eval_runtime": 116.957,
1723
+ "eval_samples_per_second": 17.1,
1724
+ "eval_steps_per_second": 2.138,
1725
+ "step": 11000
1726
  }
1727
  ],
1728
  "logging_steps": 50,
 
1742
  "attributes": {}
1743
  }
1744
  },
1745
+ "total_flos": 5.358829436928e+16,
1746
  "train_batch_size": 8,
1747
  "trial_name": null,
1748
  "trial_params": null