Training in progress, step 11500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891558696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef77a091d99ff91eba75355ced068d82754fe09197e0fe3fb0024d4681e880cd
|
3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783272762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13d127c8a77b3a6f121a1e1766bf489c05dc85af0fb609f9d0474c53a6bbc073
|
3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:657e8f4565cffd7e4abf75f56b2fdcd3ae235671ae9b5c722c485957c12a53d9
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41e2a369098bdc7bbbac32b35b2de9650fe514b2c352f9f563c5554da15cddf2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1723,6 +1723,84 @@
|
|
1723 |
"eval_samples_per_second": 17.1,
|
1724 |
"eval_steps_per_second": 2.138,
|
1725 |
"step": 11000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1726 |
}
|
1727 |
],
|
1728 |
"logging_steps": 50,
|
@@ -1742,7 +1820,7 @@
|
|
1742 |
"attributes": {}
|
1743 |
}
|
1744 |
},
|
1745 |
-
"total_flos": 5.
|
1746 |
"train_batch_size": 8,
|
1747 |
"trial_name": null,
|
1748 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.08207839727401733,
|
3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-11500",
|
4 |
+
"epoch": 1.8399999999999999,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 11500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1723 |
"eval_samples_per_second": 17.1,
|
1724 |
"eval_steps_per_second": 2.138,
|
1725 |
"step": 11000
|
1726 |
+
},
|
1727 |
+
{
|
1728 |
+
"epoch": 1.768,
|
1729 |
+
"grad_norm": 5123.0029296875,
|
1730 |
+
"learning_rate": 1.232e-05,
|
1731 |
+
"loss": 0.0562,
|
1732 |
+
"step": 11050
|
1733 |
+
},
|
1734 |
+
{
|
1735 |
+
"epoch": 1.776,
|
1736 |
+
"grad_norm": 7975.41064453125,
|
1737 |
+
"learning_rate": 1.224e-05,
|
1738 |
+
"loss": 0.0515,
|
1739 |
+
"step": 11100
|
1740 |
+
},
|
1741 |
+
{
|
1742 |
+
"epoch": 1.784,
|
1743 |
+
"grad_norm": 5846.47705078125,
|
1744 |
+
"learning_rate": 1.216e-05,
|
1745 |
+
"loss": 0.054,
|
1746 |
+
"step": 11150
|
1747 |
+
},
|
1748 |
+
{
|
1749 |
+
"epoch": 1.792,
|
1750 |
+
"grad_norm": 7158.12109375,
|
1751 |
+
"learning_rate": 1.2080000000000001e-05,
|
1752 |
+
"loss": 0.0577,
|
1753 |
+
"step": 11200
|
1754 |
+
},
|
1755 |
+
{
|
1756 |
+
"epoch": 1.8,
|
1757 |
+
"grad_norm": 5405.5224609375,
|
1758 |
+
"learning_rate": 1.2e-05,
|
1759 |
+
"loss": 0.0538,
|
1760 |
+
"step": 11250
|
1761 |
+
},
|
1762 |
+
{
|
1763 |
+
"epoch": 1.808,
|
1764 |
+
"grad_norm": 7155.9677734375,
|
1765 |
+
"learning_rate": 1.192e-05,
|
1766 |
+
"loss": 0.0539,
|
1767 |
+
"step": 11300
|
1768 |
+
},
|
1769 |
+
{
|
1770 |
+
"epoch": 1.8159999999999998,
|
1771 |
+
"grad_norm": 6886.369140625,
|
1772 |
+
"learning_rate": 1.184e-05,
|
1773 |
+
"loss": 0.0565,
|
1774 |
+
"step": 11350
|
1775 |
+
},
|
1776 |
+
{
|
1777 |
+
"epoch": 1.8239999999999998,
|
1778 |
+
"grad_norm": 7139.15283203125,
|
1779 |
+
"learning_rate": 1.1760000000000001e-05,
|
1780 |
+
"loss": 0.0539,
|
1781 |
+
"step": 11400
|
1782 |
+
},
|
1783 |
+
{
|
1784 |
+
"epoch": 1.8319999999999999,
|
1785 |
+
"grad_norm": 5965.82666015625,
|
1786 |
+
"learning_rate": 1.168e-05,
|
1787 |
+
"loss": 0.0587,
|
1788 |
+
"step": 11450
|
1789 |
+
},
|
1790 |
+
{
|
1791 |
+
"epoch": 1.8399999999999999,
|
1792 |
+
"grad_norm": 6557.6708984375,
|
1793 |
+
"learning_rate": 1.16e-05,
|
1794 |
+
"loss": 0.0552,
|
1795 |
+
"step": 11500
|
1796 |
+
},
|
1797 |
+
{
|
1798 |
+
"epoch": 1.8399999999999999,
|
1799 |
+
"eval_loss": 0.08207839727401733,
|
1800 |
+
"eval_runtime": 116.751,
|
1801 |
+
"eval_samples_per_second": 17.13,
|
1802 |
+
"eval_steps_per_second": 2.141,
|
1803 |
+
"step": 11500
|
1804 |
}
|
1805 |
],
|
1806 |
"logging_steps": 50,
|
|
|
1820 |
"attributes": {}
|
1821 |
}
|
1822 |
},
|
1823 |
+
"total_flos": 5.602412593152e+16,
|
1824 |
"train_batch_size": 8,
|
1825 |
"trial_name": null,
|
1826 |
"trial_params": null
|