ryanmarten commited on
Commit
dcccba6
·
verified ·
1 Parent(s): d2e6e2f

Training in progress, epoch 3

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7084ec7d9ce330d83f5a4e6297e747ef4b00f4eb012aa0bdc2ff09eeefd3ec8e
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b378a1f291a4c58b487cb2884a47ce2c6faf5f394d85073f242df6368bc49e1e
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85569831145490912ed3c505b0c065606aa72c679894d9d93581137a06f194b4
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3de52b6646fa02ea78ecc3dedcab393252510c01fa11bb37872658835c2e3714
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8230b170f1e59b604bd34712f8706cc677c930004b39e83f8d6bc20a5f6489e5
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f642f6c86b69ae4d688009b5eb499caf99fbf9e6fffe290747acfd54c33224c
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:942140ed438a19bc1cbccce4d0dce776bdb108dfacf40f9884e48f93155f8749
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc7f210c63b8db5cafc01d6b6782b62fa9fb71147ff312d166ac8664b6a2c12d
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -1886,3 +1886,123 @@
1886
  {"current_steps": 988, "total_steps": 1230, "loss": 0.1317, "lr": 4.534166308795815e-06, "epoch": 4.008101265822785, "percentage": 80.33, "elapsed_time": "23:10:38", "remaining_time": "5:40:37"}
1887
  {"current_steps": 899, "total_steps": 1230, "loss": 0.1638, "lr": 8.193841711079775e-06, "epoch": 3.6460759493670887, "percentage": 73.09, "elapsed_time": "21:00:42", "remaining_time": "7:44:10"}
1888
  {"current_steps": 989, "total_steps": 1230, "loss": 0.1346, "lr": 4.498240773728859e-06, "epoch": 4.012151898734177, "percentage": 80.41, "elapsed_time": "23:12:06", "remaining_time": "5:39:13"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1886
  {"current_steps": 988, "total_steps": 1230, "loss": 0.1317, "lr": 4.534166308795815e-06, "epoch": 4.008101265822785, "percentage": 80.33, "elapsed_time": "23:10:38", "remaining_time": "5:40:37"}
1887
  {"current_steps": 899, "total_steps": 1230, "loss": 0.1638, "lr": 8.193841711079775e-06, "epoch": 3.6460759493670887, "percentage": 73.09, "elapsed_time": "21:00:42", "remaining_time": "7:44:10"}
1888
  {"current_steps": 989, "total_steps": 1230, "loss": 0.1346, "lr": 4.498240773728859e-06, "epoch": 4.012151898734177, "percentage": 80.41, "elapsed_time": "23:12:06", "remaining_time": "5:39:13"}
1889
+ {"current_steps": 900, "total_steps": 1230, "loss": 0.1706, "lr": 8.148074952982828e-06, "epoch": 3.6501265822784807, "percentage": 73.17, "elapsed_time": "21:02:05", "remaining_time": "7:42:45"}
1890
+ {"current_steps": 990, "total_steps": 1230, "loss": 0.1227, "lr": 4.462440087699609e-06, "epoch": 4.01620253164557, "percentage": 80.49, "elapsed_time": "23:13:19", "remaining_time": "5:37:46"}
1891
+ {"current_steps": 901, "total_steps": 1230, "loss": 0.1709, "lr": 8.102403648660859e-06, "epoch": 3.6541772151898733, "percentage": 73.25, "elapsed_time": "21:03:24", "remaining_time": "7:41:20"}
1892
+ {"current_steps": 991, "total_steps": 1230, "loss": 0.122, "lr": 4.426764539041861e-06, "epoch": 4.020253164556962, "percentage": 80.57, "elapsed_time": "23:14:45", "remaining_time": "5:36:22"}
1893
+ {"current_steps": 902, "total_steps": 1230, "loss": 0.16, "lr": 8.056828165944282e-06, "epoch": 3.6582278481012658, "percentage": 73.33, "elapsed_time": "21:04:43", "remaining_time": "7:39:54"}
1894
+ {"current_steps": 992, "total_steps": 1230, "loss": 0.1175, "lr": 4.391214415081582e-06, "epoch": 4.024303797468354, "percentage": 80.65, "elapsed_time": "23:16:05", "remaining_time": "5:34:57"}
1895
+ {"current_steps": 903, "total_steps": 1230, "loss": 0.178, "lr": 8.011348871891762e-06, "epoch": 3.6622784810126583, "percentage": 73.41, "elapsed_time": "21:06:07", "remaining_time": "7:38:29"}
1896
+ {"current_steps": 993, "total_steps": 1230, "loss": 0.1403, "lr": 4.355790002134579e-06, "epoch": 4.028354430379747, "percentage": 80.73, "elapsed_time": "23:17:34", "remaining_time": "5:33:33"}
1897
+ {"current_steps": 904, "total_steps": 1230, "loss": 0.1702, "lr": 7.965966132787287e-06, "epoch": 3.666329113924051, "percentage": 73.5, "elapsed_time": "21:07:35", "remaining_time": "7:37:07"}
1898
+ {"current_steps": 994, "total_steps": 1230, "loss": 0.126, "lr": 4.320491585504207e-06, "epoch": 4.032405063291139, "percentage": 80.81, "elapsed_time": "23:19:03", "remaining_time": "5:32:10"}
1899
+ {"current_steps": 905, "total_steps": 1230, "loss": 0.1605, "lr": 7.920680314137189e-06, "epoch": 3.670379746835443, "percentage": 73.58, "elapsed_time": "21:08:40", "remaining_time": "7:35:36"}
1900
+ {"current_steps": 995, "total_steps": 1230, "loss": 0.1483, "lr": 4.2853194494790615e-06, "epoch": 4.036455696202531, "percentage": 80.89, "elapsed_time": "23:20:28", "remaining_time": "5:30:45"}
1901
+ {"current_steps": 906, "total_steps": 1230, "loss": 0.148, "lr": 7.875491780667246e-06, "epoch": 3.6744303797468354, "percentage": 73.66, "elapsed_time": "21:09:55", "remaining_time": "7:34:08"}
1902
+ {"current_steps": 996, "total_steps": 1230, "loss": 0.1389, "lr": 4.250273877330691e-06, "epoch": 4.040506329113924, "percentage": 80.98, "elapsed_time": "23:21:56", "remaining_time": "5:29:22"}
1903
+ {"current_steps": 907, "total_steps": 1230, "loss": 0.1691, "lr": 7.830400896319667e-06, "epoch": 3.678481012658228, "percentage": 73.74, "elapsed_time": "21:11:11", "remaining_time": "7:32:41"}
1904
+ {"current_steps": 997, "total_steps": 1230, "loss": 0.1125, "lr": 4.215355151311313e-06, "epoch": 4.044556962025316, "percentage": 81.06, "elapsed_time": "23:23:11", "remaining_time": "5:27:55"}
1905
+ {"current_steps": 908, "total_steps": 1230, "loss": 0.159, "lr": 7.785408024250259e-06, "epoch": 3.68253164556962, "percentage": 73.82, "elapsed_time": "21:12:31", "remaining_time": "7:31:16"}
1906
+ {"current_steps": 998, "total_steps": 1230, "loss": 0.1367, "lr": 4.180563552651542e-06, "epoch": 4.048607594936709, "percentage": 81.14, "elapsed_time": "23:24:32", "remaining_time": "5:26:30"}
1907
+ {"current_steps": 909, "total_steps": 1230, "loss": 0.1808, "lr": 7.74051352682542e-06, "epoch": 3.6865822784810125, "percentage": 73.9, "elapsed_time": "21:13:58", "remaining_time": "7:29:53"}
1908
+ {"current_steps": 999, "total_steps": 1230, "loss": 0.1258, "lr": 4.145899361558147e-06, "epoch": 4.052658227848101, "percentage": 81.22, "elapsed_time": "23:25:40", "remaining_time": "5:25:02"}
1909
+ {"current_steps": 910, "total_steps": 1230, "loss": 0.1565, "lr": 7.695717765619257e-06, "epoch": 3.690632911392405, "percentage": 73.98, "elapsed_time": "21:15:04", "remaining_time": "7:28:22"}
1910
+ {"current_steps": 1000, "total_steps": 1230, "loss": 0.1355, "lr": 4.111362857211738e-06, "epoch": 4.056708860759493, "percentage": 81.3, "elapsed_time": "23:27:07", "remaining_time": "5:23:38"}
1911
+ {"current_steps": 911, "total_steps": 1230, "loss": 0.1591, "lr": 7.651021101410673e-06, "epoch": 3.6946835443037975, "percentage": 74.07, "elapsed_time": "21:16:15", "remaining_time": "7:26:54"}
1912
+ {"current_steps": 1001, "total_steps": 1230, "loss": 0.1356, "lr": 4.076954317764592e-06, "epoch": 4.060759493670886, "percentage": 81.38, "elapsed_time": "23:28:40", "remaining_time": "5:22:15"}
1913
+ {"current_steps": 912, "total_steps": 1230, "loss": 0.1708, "lr": 7.606423894180464e-06, "epoch": 3.69873417721519, "percentage": 74.15, "elapsed_time": "21:17:32", "remaining_time": "7:25:27"}
1914
+ {"current_steps": 913, "total_steps": 1230, "loss": 0.1831, "lr": 7.56192650310839e-06, "epoch": 3.702784810126582, "percentage": 74.23, "elapsed_time": "21:18:56", "remaining_time": "7:24:03"}
1915
+ {"current_steps": 1002, "total_steps": 1230, "loss": 0.153, "lr": 4.042674020338335e-06, "epoch": 4.0648101265822785, "percentage": 81.46, "elapsed_time": "23:30:25", "remaining_time": "5:20:56"}
1916
+ {"current_steps": 914, "total_steps": 1230, "loss": 0.1815, "lr": 7.517529286570349e-06, "epoch": 3.7068354430379746, "percentage": 74.31, "elapsed_time": "21:20:19", "remaining_time": "7:22:38"}
1917
+ {"current_steps": 1003, "total_steps": 1230, "loss": 0.1239, "lr": 4.0085222410217835e-06, "epoch": 4.0688607594936705, "percentage": 81.54, "elapsed_time": "23:31:31", "remaining_time": "5:19:27"}
1918
+ {"current_steps": 915, "total_steps": 1230, "loss": 0.1905, "lr": 7.473232602135387e-06, "epoch": 3.710886075949367, "percentage": 74.39, "elapsed_time": "21:21:56", "remaining_time": "7:21:19"}
1919
+ {"current_steps": 1004, "total_steps": 1230, "loss": 0.1427, "lr": 3.974499254868674e-06, "epoch": 4.0729113924050635, "percentage": 81.63, "elapsed_time": "23:33:08", "remaining_time": "5:18:05"}
1920
+ {"current_steps": 1005, "total_steps": 1230, "loss": 0.1355, "lr": 3.940605335895451e-06, "epoch": 4.076962025316456, "percentage": 81.71, "elapsed_time": "23:34:32", "remaining_time": "5:16:41"}
1921
+ {"current_steps": 916, "total_steps": 1230, "loss": 0.17, "lr": 7.429036806562935e-06, "epoch": 3.714936708860759, "percentage": 74.47, "elapsed_time": "21:23:30", "remaining_time": "7:19:58"}
1922
+ {"current_steps": 1006, "total_steps": 1230, "loss": 0.1229, "lr": 3.90684075707908e-06, "epoch": 4.0810126582278485, "percentage": 81.79, "elapsed_time": "23:35:35", "remaining_time": "5:15:12"}
1923
+ {"current_steps": 917, "total_steps": 1230, "loss": 0.1687, "lr": 7.3849422557998455e-06, "epoch": 3.7189873417721517, "percentage": 74.55, "elapsed_time": "21:24:41", "remaining_time": "7:18:30"}
1924
+ {"current_steps": 918, "total_steps": 1230, "loss": 0.1774, "lr": 7.340949304977567e-06, "epoch": 3.7230379746835442, "percentage": 74.63, "elapsed_time": "21:25:58", "remaining_time": "7:17:03"}
1925
+ {"current_steps": 1007, "total_steps": 1230, "loss": 0.1195, "lr": 3.8732057903548505e-06, "epoch": 4.085063291139241, "percentage": 81.87, "elapsed_time": "23:37:07", "remaining_time": "5:13:49"}
1926
+ {"current_steps": 1008, "total_steps": 1230, "loss": 0.1353, "lr": 3.8397007066141375e-06, "epoch": 4.089113924050633, "percentage": 81.95, "elapsed_time": "23:38:28", "remaining_time": "5:12:24"}
1927
+ {"current_steps": 919, "total_steps": 1230, "loss": 0.1621, "lr": 7.297058308409282e-06, "epoch": 3.7270886075949368, "percentage": 74.72, "elapsed_time": "21:27:22", "remaining_time": "7:15:39"}
1928
+ {"current_steps": 1009, "total_steps": 1230, "loss": 0.132, "lr": 3.806325775702304e-06, "epoch": 4.093164556962026, "percentage": 82.03, "elapsed_time": "23:39:47", "remaining_time": "5:10:58"}
1929
+ {"current_steps": 920, "total_steps": 1230, "loss": 0.191, "lr": 7.25326961958704e-06, "epoch": 3.7311392405063293, "percentage": 74.8, "elapsed_time": "21:28:47", "remaining_time": "7:14:16"}
1930
+ {"current_steps": 1010, "total_steps": 1230, "loss": 0.1331, "lr": 3.773081266416434e-06, "epoch": 4.097215189873418, "percentage": 82.11, "elapsed_time": "23:41:09", "remaining_time": "5:09:33"}
1931
+ {"current_steps": 921, "total_steps": 1230, "loss": 0.1665, "lr": 7.209583591178921e-06, "epoch": 3.7351898734177214, "percentage": 74.88, "elapsed_time": "21:30:13", "remaining_time": "7:12:52"}
1932
+ {"current_steps": 1011, "total_steps": 1230, "loss": 0.1454, "lr": 3.739967446503245e-06, "epoch": 4.10126582278481, "percentage": 82.2, "elapsed_time": "23:42:35", "remaining_time": "5:08:09"}
1933
+ {"current_steps": 922, "total_steps": 1230, "loss": 0.178, "lr": 7.1660005750261925e-06, "epoch": 3.739240506329114, "percentage": 74.96, "elapsed_time": "21:31:53", "remaining_time": "7:11:34"}
1934
+ {"current_steps": 1012, "total_steps": 1230, "loss": 0.1244, "lr": 3.706984582656894e-06, "epoch": 4.105316455696203, "percentage": 82.28, "elapsed_time": "23:44:03", "remaining_time": "5:06:45"}
1935
+ {"current_steps": 923, "total_steps": 1230, "loss": 0.1686, "lr": 7.1225209221404765e-06, "epoch": 3.7432911392405064, "percentage": 75.04, "elapsed_time": "21:33:07", "remaining_time": "7:10:06"}
1936
+ {"current_steps": 924, "total_steps": 1230, "loss": 0.1758, "lr": 7.079144982700909e-06, "epoch": 3.747341772151899, "percentage": 75.12, "elapsed_time": "21:34:32", "remaining_time": "7:08:42"}
1937
+ {"current_steps": 1013, "total_steps": 1230, "loss": 0.1504, "lr": 3.6741329405168237e-06, "epoch": 4.109367088607595, "percentage": 82.36, "elapsed_time": "23:45:48", "remaining_time": "5:05:25"}
1938
+ {"current_steps": 1014, "total_steps": 1230, "loss": 0.1209, "lr": 3.641412784665648e-06, "epoch": 4.113417721518988, "percentage": 82.44, "elapsed_time": "23:47:09", "remaining_time": "5:04:00"}
1939
+ {"current_steps": 925, "total_steps": 1230, "loss": 0.2024, "lr": 7.0358731060513695e-06, "epoch": 3.7513924050632914, "percentage": 75.2, "elapsed_time": "21:36:05", "remaining_time": "7:07:21"}
1940
+ {"current_steps": 926, "total_steps": 1230, "loss": 0.1639, "lr": 6.99270564069757e-06, "epoch": 3.7554430379746835, "percentage": 75.28, "elapsed_time": "21:37:31", "remaining_time": "7:05:58"}
1941
+ {"current_steps": 1015, "total_steps": 1230, "loss": 0.1353, "lr": 3.608824378627005e-06, "epoch": 4.11746835443038, "percentage": 82.52, "elapsed_time": "23:48:40", "remaining_time": "5:02:37"}
1942
+ {"current_steps": 1016, "total_steps": 1230, "loss": 0.1125, "lr": 3.5763679848634337e-06, "epoch": 4.121518987341772, "percentage": 82.6, "elapsed_time": "23:49:45", "remaining_time": "5:01:09"}
1943
+ {"current_steps": 927, "total_steps": 1230, "loss": 0.1665, "lr": 6.949642934304375e-06, "epoch": 3.759493670886076, "percentage": 75.37, "elapsed_time": "21:38:48", "remaining_time": "7:04:31"}
1944
+ {"current_steps": 1017, "total_steps": 1230, "loss": 0.1379, "lr": 3.544043864774269e-06, "epoch": 4.125569620253165, "percentage": 82.68, "elapsed_time": "23:50:59", "remaining_time": "4:59:42"}
1945
+ {"current_steps": 928, "total_steps": 1230, "loss": 0.1898, "lr": 6.906685333692871e-06, "epoch": 3.7635443037974685, "percentage": 75.45, "elapsed_time": "21:40:19", "remaining_time": "7:03:10"}
1946
+ {"current_steps": 1018, "total_steps": 1230, "loss": 0.1532, "lr": 3.5118522786935282e-06, "epoch": 4.129620253164557, "percentage": 82.76, "elapsed_time": "23:52:27", "remaining_time": "4:58:18"}
1947
+ {"current_steps": 929, "total_steps": 1230, "loss": 0.1667, "lr": 6.86383318483769e-06, "epoch": 3.7675949367088606, "percentage": 75.53, "elapsed_time": "21:41:37", "remaining_time": "7:01:43"}
1948
+ {"current_steps": 1019, "total_steps": 1230, "loss": 0.1084, "lr": 3.479793485887819e-06, "epoch": 4.133670886075949, "percentage": 82.85, "elapsed_time": "23:53:33", "remaining_time": "4:56:50"}
1949
+ {"current_steps": 930, "total_steps": 1230, "loss": 0.1738, "lr": 6.821086832864139e-06, "epoch": 3.771645569620253, "percentage": 75.61, "elapsed_time": "21:42:59", "remaining_time": "7:00:19"}
1950
+ {"current_steps": 931, "total_steps": 1230, "loss": 0.1732, "lr": 6.77844662204546e-06, "epoch": 3.7756962025316456, "percentage": 75.69, "elapsed_time": "21:44:14", "remaining_time": "6:58:52"}
1951
+ {"current_steps": 932, "total_steps": 1230, "loss": 0.1901, "lr": 6.7359128958000455e-06, "epoch": 3.779746835443038, "percentage": 75.77, "elapsed_time": "21:46:12", "remaining_time": "6:57:38"}
1952
+ {"current_steps": 933, "total_steps": 1230, "loss": 0.1589, "lr": 6.693485996688695e-06, "epoch": 3.7837974683544306, "percentage": 75.85, "elapsed_time": "21:47:25", "remaining_time": "6:56:11"}
1953
+ {"current_steps": 934, "total_steps": 1230, "loss": 0.1643, "lr": 6.651166266411801e-06, "epoch": 3.7878481012658227, "percentage": 75.93, "elapsed_time": "21:48:43", "remaining_time": "6:54:45"}
1954
+ {"current_steps": 935, "total_steps": 1230, "loss": 0.1781, "lr": 6.6089540458066725e-06, "epoch": 3.7918987341772152, "percentage": 76.02, "elapsed_time": "21:50:13", "remaining_time": "6:53:23"}
1955
+ {"current_steps": 936, "total_steps": 1230, "loss": 0.1715, "lr": 6.566849674844711e-06, "epoch": 3.7959493670886078, "percentage": 76.1, "elapsed_time": "21:51:29", "remaining_time": "6:51:56"}
1956
+ {"current_steps": 937, "total_steps": 1230, "loss": 0.1837, "lr": 6.524853492628747e-06, "epoch": 3.8, "percentage": 76.18, "elapsed_time": "21:53:04", "remaining_time": "6:50:35"}
1957
+ {"current_steps": 938, "total_steps": 1230, "loss": 0.1553, "lr": 6.4829658373902536e-06, "epoch": 3.8040506329113923, "percentage": 76.26, "elapsed_time": "21:54:31", "remaining_time": "6:49:12"}
1958
+ {"current_steps": 939, "total_steps": 1230, "loss": 0.1703, "lr": 6.441187046486648e-06, "epoch": 3.808101265822785, "percentage": 76.34, "elapsed_time": "21:55:50", "remaining_time": "6:47:47"}
1959
+ {"current_steps": 940, "total_steps": 1230, "loss": 0.1634, "lr": 6.399517456398567e-06, "epoch": 3.8121518987341774, "percentage": 76.42, "elapsed_time": "21:57:00", "remaining_time": "6:46:18"}
1960
+ {"current_steps": 941, "total_steps": 1230, "loss": 0.1641, "lr": 6.357957402727164e-06, "epoch": 3.81620253164557, "percentage": 76.5, "elapsed_time": "21:58:24", "remaining_time": "6:44:54"}
1961
+ {"current_steps": 942, "total_steps": 1230, "loss": 0.1583, "lr": 6.316507220191395e-06, "epoch": 3.820253164556962, "percentage": 76.59, "elapsed_time": "21:59:41", "remaining_time": "6:43:28"}
1962
+ {"current_steps": 943, "total_steps": 1230, "loss": 0.1609, "lr": 6.275167242625331e-06, "epoch": 3.8243037974683545, "percentage": 76.67, "elapsed_time": "22:00:50", "remaining_time": "6:41:59"}
1963
+ {"current_steps": 944, "total_steps": 1230, "loss": 0.1587, "lr": 6.233937802975471e-06, "epoch": 3.828354430379747, "percentage": 76.75, "elapsed_time": "22:02:06", "remaining_time": "6:40:33"}
1964
+ {"current_steps": 945, "total_steps": 1230, "loss": 0.1783, "lr": 6.192819233298046e-06, "epoch": 3.832405063291139, "percentage": 76.83, "elapsed_time": "22:03:27", "remaining_time": "6:39:08"}
1965
+ {"current_steps": 946, "total_steps": 1230, "loss": 0.1682, "lr": 6.151811864756383e-06, "epoch": 3.8364556962025316, "percentage": 76.91, "elapsed_time": "22:04:52", "remaining_time": "6:37:44"}
1966
+ {"current_steps": 947, "total_steps": 1230, "loss": 0.163, "lr": 6.1109160276181655e-06, "epoch": 3.840506329113924, "percentage": 76.99, "elapsed_time": "22:06:16", "remaining_time": "6:36:20"}
1967
+ {"current_steps": 948, "total_steps": 1230, "loss": 0.1744, "lr": 6.070132051252868e-06, "epoch": 3.8445569620253166, "percentage": 77.07, "elapsed_time": "22:07:47", "remaining_time": "6:34:58"}
1968
+ {"current_steps": 949, "total_steps": 1230, "loss": 0.1722, "lr": 6.0294602641290034e-06, "epoch": 3.848607594936709, "percentage": 77.15, "elapsed_time": "22:09:12", "remaining_time": "6:33:34"}
1969
+ {"current_steps": 950, "total_steps": 1230, "loss": 0.1641, "lr": 5.988900993811575e-06, "epoch": 3.852658227848101, "percentage": 77.24, "elapsed_time": "22:10:31", "remaining_time": "6:32:09"}
1970
+ {"current_steps": 951, "total_steps": 1230, "loss": 0.1541, "lr": 5.948454566959363e-06, "epoch": 3.8567088607594937, "percentage": 77.32, "elapsed_time": "22:11:47", "remaining_time": "6:30:42"}
1971
+ {"current_steps": 952, "total_steps": 1230, "loss": 0.1741, "lr": 5.908121309322328e-06, "epoch": 3.8607594936708862, "percentage": 77.4, "elapsed_time": "22:13:18", "remaining_time": "6:29:20"}
1972
+ {"current_steps": 953, "total_steps": 1230, "loss": 0.1601, "lr": 5.867901545738976e-06, "epoch": 3.8648101265822783, "percentage": 77.48, "elapsed_time": "22:14:37", "remaining_time": "6:27:55"}
1973
+ {"current_steps": 954, "total_steps": 1230, "loss": 0.1616, "lr": 5.827795600133774e-06, "epoch": 3.868860759493671, "percentage": 77.56, "elapsed_time": "22:15:53", "remaining_time": "6:26:29"}
1974
+ {"current_steps": 955, "total_steps": 1230, "loss": 0.1914, "lr": 5.787803795514466e-06, "epoch": 3.8729113924050633, "percentage": 77.64, "elapsed_time": "22:17:24", "remaining_time": "6:25:06"}
1975
+ {"current_steps": 956, "total_steps": 1230, "loss": 0.1618, "lr": 5.747926453969576e-06, "epoch": 3.876962025316456, "percentage": 77.72, "elapsed_time": "22:18:44", "remaining_time": "6:23:41"}
1976
+ {"current_steps": 957, "total_steps": 1230, "loss": 0.1726, "lr": 5.708163896665708e-06, "epoch": 3.8810126582278484, "percentage": 77.8, "elapsed_time": "22:20:08", "remaining_time": "6:22:17"}
1977
+ {"current_steps": 958, "total_steps": 1230, "loss": 0.1732, "lr": 5.668516443845047e-06, "epoch": 3.8850632911392404, "percentage": 77.89, "elapsed_time": "22:21:32", "remaining_time": "6:20:53"}
1978
+ {"current_steps": 959, "total_steps": 1230, "loss": 0.1862, "lr": 5.6289844148227225e-06, "epoch": 3.889113924050633, "percentage": 77.97, "elapsed_time": "22:23:05", "remaining_time": "6:19:32"}
1979
+ {"current_steps": 960, "total_steps": 1230, "loss": 0.1918, "lr": 5.5895681279842615e-06, "epoch": 3.8931645569620255, "percentage": 78.05, "elapsed_time": "22:24:39", "remaining_time": "6:18:11"}
1980
+ {"current_steps": 961, "total_steps": 1230, "loss": 0.1656, "lr": 5.550267900783019e-06, "epoch": 3.8972151898734175, "percentage": 78.13, "elapsed_time": "22:25:50", "remaining_time": "6:16:43"}
1981
+ {"current_steps": 962, "total_steps": 1230, "loss": 0.1804, "lr": 5.511084049737623e-06, "epoch": 3.90126582278481, "percentage": 78.21, "elapsed_time": "22:27:16", "remaining_time": "6:15:19"}
1982
+ {"current_steps": 963, "total_steps": 1230, "loss": 0.1706, "lr": 5.4720168904294215e-06, "epoch": 3.9053164556962026, "percentage": 78.29, "elapsed_time": "22:28:37", "remaining_time": "6:13:54"}
1983
+ {"current_steps": 964, "total_steps": 1230, "loss": 0.1622, "lr": 5.433066737499948e-06, "epoch": 3.909367088607595, "percentage": 78.37, "elapsed_time": "22:29:35", "remaining_time": "6:12:23"}
1984
+ {"current_steps": 965, "total_steps": 1230, "loss": 0.1618, "lr": 5.394233904648376e-06, "epoch": 3.9134177215189876, "percentage": 78.46, "elapsed_time": "22:30:53", "remaining_time": "6:10:58"}
1985
+ {"current_steps": 966, "total_steps": 1230, "loss": 0.164, "lr": 5.355518704628997e-06, "epoch": 3.9174683544303797, "percentage": 78.54, "elapsed_time": "22:32:08", "remaining_time": "6:09:31"}
1986
+ {"current_steps": 967, "total_steps": 1230, "loss": 0.1748, "lr": 5.316921449248731e-06, "epoch": 3.921518987341772, "percentage": 78.62, "elapsed_time": "22:33:35", "remaining_time": "6:08:08"}
1987
+ {"current_steps": 968, "total_steps": 1230, "loss": 0.1888, "lr": 5.278442449364538e-06, "epoch": 3.9255696202531647, "percentage": 78.7, "elapsed_time": "22:35:10", "remaining_time": "6:06:47"}
1988
+ {"current_steps": 969, "total_steps": 1230, "loss": 0.1724, "lr": 5.240082014881016e-06, "epoch": 3.9296202531645568, "percentage": 78.78, "elapsed_time": "22:36:30", "remaining_time": "6:05:22"}
1989
+ {"current_steps": 970, "total_steps": 1230, "loss": 0.1748, "lr": 5.201840454747822e-06, "epoch": 3.9336708860759493, "percentage": 78.86, "elapsed_time": "22:37:59", "remaining_time": "6:03:59"}
1990
+ {"current_steps": 971, "total_steps": 1230, "loss": 0.1777, "lr": 5.163718076957223e-06, "epoch": 3.937721518987342, "percentage": 78.94, "elapsed_time": "22:39:25", "remaining_time": "6:02:36"}
1991
+ {"current_steps": 972, "total_steps": 1230, "loss": 0.189, "lr": 5.125715188541609e-06, "epoch": 3.9417721518987343, "percentage": 79.02, "elapsed_time": "22:40:58", "remaining_time": "6:01:14"}
1992
+ {"current_steps": 973, "total_steps": 1230, "loss": 0.1989, "lr": 5.087832095571021e-06, "epoch": 3.945822784810127, "percentage": 79.11, "elapsed_time": "22:42:42", "remaining_time": "5:59:56"}
1993
+ {"current_steps": 974, "total_steps": 1230, "loss": 0.1853, "lr": 5.0500691031506766e-06, "epoch": 3.949873417721519, "percentage": 79.19, "elapsed_time": "22:44:20", "remaining_time": "5:58:35"}
1994
+ {"current_steps": 975, "total_steps": 1230, "loss": 0.1743, "lr": 5.01242651541854e-06, "epoch": 3.9539240506329114, "percentage": 79.27, "elapsed_time": "22:46:01", "remaining_time": "5:57:15"}
1995
+ {"current_steps": 976, "total_steps": 1230, "loss": 0.1793, "lr": 4.974904635542815e-06, "epoch": 3.957974683544304, "percentage": 79.35, "elapsed_time": "22:47:23", "remaining_time": "5:55:51"}
1996
+ {"current_steps": 977, "total_steps": 1230, "loss": 0.1697, "lr": 4.937503765719582e-06, "epoch": 3.962025316455696, "percentage": 79.43, "elapsed_time": "22:48:51", "remaining_time": "5:54:28"}
1997
+ {"current_steps": 978, "total_steps": 1230, "loss": 0.172, "lr": 4.900224207170299e-06, "epoch": 3.9660759493670885, "percentage": 79.51, "elapsed_time": "22:50:06", "remaining_time": "5:53:02"}
1998
+ {"current_steps": 979, "total_steps": 1230, "loss": 0.181, "lr": 4.8630662601394065e-06, "epoch": 3.970126582278481, "percentage": 79.59, "elapsed_time": "22:51:33", "remaining_time": "5:51:38"}
1999
+ {"current_steps": 980, "total_steps": 1230, "loss": 0.1699, "lr": 4.8260302238918995e-06, "epoch": 3.9741772151898735, "percentage": 79.67, "elapsed_time": "22:52:55", "remaining_time": "5:50:14"}
2000
+ {"current_steps": 981, "total_steps": 1230, "loss": 0.1534, "lr": 4.789116396710924e-06, "epoch": 3.978227848101266, "percentage": 79.76, "elapsed_time": "22:54:02", "remaining_time": "5:48:45"}
2001
+ {"current_steps": 982, "total_steps": 1230, "loss": 0.1714, "lr": 4.752325075895368e-06, "epoch": 3.982278481012658, "percentage": 79.84, "elapsed_time": "22:55:20", "remaining_time": "5:47:20"}
2002
+ {"current_steps": 983, "total_steps": 1230, "loss": 0.1493, "lr": 4.715656557757473e-06, "epoch": 3.9863291139240506, "percentage": 79.92, "elapsed_time": "22:56:23", "remaining_time": "5:45:50"}
2003
+ {"current_steps": 984, "total_steps": 1230, "loss": 0.1796, "lr": 4.679111137620442e-06, "epoch": 3.990379746835443, "percentage": 80.0, "elapsed_time": "22:57:55", "remaining_time": "5:44:28"}
2004
+ {"current_steps": 985, "total_steps": 1230, "loss": 0.1735, "lr": 4.6426891098160585e-06, "epoch": 3.9944303797468352, "percentage": 80.08, "elapsed_time": "22:59:18", "remaining_time": "5:43:04"}
2005
+ {"current_steps": 986, "total_steps": 1230, "loss": 0.1729, "lr": 4.6063907676823474e-06, "epoch": 3.9984810126582278, "percentage": 80.16, "elapsed_time": "23:00:37", "remaining_time": "5:41:39"}
2006
+ {"current_steps": 987, "total_steps": 1230, "loss": 0.1393, "lr": 4.570216403561141e-06, "epoch": 4.004050632911392, "percentage": 80.24, "elapsed_time": "23:03:11", "remaining_time": "5:40:32"}
2007
+ {"current_steps": 988, "total_steps": 1230, "loss": 0.1318, "lr": 4.534166308795815e-06, "epoch": 4.008101265822785, "percentage": 80.33, "elapsed_time": "23:04:35", "remaining_time": "5:39:08"}
2008
+ {"current_steps": 989, "total_steps": 1230, "loss": 0.1344, "lr": 4.498240773728859e-06, "epoch": 4.012151898734177, "percentage": 80.41, "elapsed_time": "23:06:02", "remaining_time": "5:37:45"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b109346f835356baa84e067fdb9a82cdcae7d60dd3739e1d9b7b4e6b1eca559
3
  size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbfe2753ce890402118e13912d3877e6acebdd7767919752be931aea1065b060
3
  size 7288